yacc&lexでJSONパーサを作る
yacc&lexの練習のために作ったJSONパーサです。
一通り、JSONのBNF*1には従ったつもりです。
また、ソースには以前「C言語で文字列を簡単にかつ少し高速に操作する - ほんまの走り書き技術メモ」で紹介したMyStringを使用しています。
object {} { members } members pair pair , members pair string : value array [] [ elements ] elements value value , elements value string number object array true false null string "" " chars " chars char char chars char any-Unicode-character- except-"-or-\-or- control-character \" \\ \/ \b \f \n \r \t \u four-hex-digits number int int frac int exp int frac exp int digit digit1-9 digits - digit - digit1-9 digits frac . digits exp e digits digits digit digit digits e e e+ e- E E+ E-
使用例
この後ソースばかりなので、最初に使用方法を紹介しておきます。
json_parserという関数に入力ファイルのポインタを与えるだけで、パースしてくれます。
json_printは見やすい形で、JSONを出力してくれます。(少し手抜きですが。。)
最小構成
#include <stdio.h> #include "json.h" int main(int argc, char **argv) { FILE *fp = stdin; JSONValue *json; if (argc >= 2) { if (!(fp = fopen(argv[1], "r"))) { fprintf(stderr, "File [%s] is not found!\n", argv[1]); return 1; } } if ((json = json_parser(fp))) { json_print(stdout, json); } else { printf("Json error!\n"); } return 0; }
値の取得
値の取得には以下の2つの関数を使用します。
json_value_at_indexはJSONの配列からindex番目の要素を取り出す関数です。
json_value_at_keyはJSONのオブジェクトからkeyに一致する要素を取り出す関数です。
JSONValue *json_value_at_index(JSONValue *value, int index); JSONValue *json_value_at_key(JSONValue *value, char *key);
{ "object":[ {"[{:0":"\"\n\\", ",0":"\"'\""}, {"[{:1":"\"\n\\", ",1":"\"'\""}, {"[{:2":"\"\n\\", ",2":"\"'\""}, {"[{:3":"\"\n\\", ",3":"\"'\""}, {"[{:4":"\"\n\\", ",4":"\"'\""}, {"[{:5":"\"\n\\", ",5":"\"'\""} ], "num":[ 0, 123, 123,456, -987, -0.987654, 123E2, 123E+3, 123E-4, 123.456e2, 123.456e+3, 123.456e-4, -123E2, -123E+3, -123E-4, -123.456e2, -123.456e+3, -123.456e-4, 0.123, 0.123, 0.123e1, 0.123E+1, 0.123E-1 ], "unicode":{ "utf8":"\u30c6\u30b9\u30c8Unicode\u5909\u63db\u3067\u3059\u3002", "utf16":"\uD950\uDF21", "sura":"\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/" } }
例えばこの中から、
/num[5]⇒-0.987654
を取り出す場合には、
double d_val = json_value_at_index(json_value_at_key(json, "num"), 5)->d_value;
と、各ことで取り出すことが出来ます。
おっと、大切なことを言い忘れていましたが、JSONをパースするとJSONValueという構造体が返されます。
値は共用体になっており、typeで判断してtypeに合った方法で取り出す必要があります。
ですから、上のソースは、
JSONValue *num, *val; double d_val; if (json->type == JSON_VALUE_TYPE_OBJECT) { num = json_value_at_key(json, "num"); if (num->type == JSON_VALUE_TYPE_ARRAY val = json_value_at_index(num, 5); if (val->type == JSON_VALUE_TYPE_DOUBLE) { d_val = val->d_value; } } }
と、なります。まぁDOMの操作とさほど違いはありませんね(汗
各typeとJSONValueの構造体はこんな感じです。
enum { JSON_VALUE_TYPE_INTEGER = 0, JSON_VALUE_TYPE_DOUBLE, JSON_VALUE_TYPE_STRING, JSON_VALUE_TYPE_OBJECT, JSON_VALUE_TYPE_ARRAY, JSON_VALUE_TYPE_TRUE, JSON_VALUE_TYPE_FALSE, JSON_VALUE_TYPE_NULL, ALL_JSON_VALUE_TYPE_NUM, };
struct JSONValue { int type; union { int i_value; double d_value; char *s_value; JSONObject *o_value; JSONArray *a_value; }; };
ソース
以下全てソースです。コピペすれば(多分)コンパイルできると思います。
lex.l
%{ #include <stdio.h> #include <stdlib.h> #include "MyString.h" #include "json.h" #include "parser.tab.h" extern int yyerror(const char *s); //#undef YY_INPUT //#define YY_INPUT(buf, result, max_size) (result = json_yyinput(buf, max_size)) static MyString *share_string; static unichar ucs2_hi; int my_yyinput(char *buf, int max_size); //extern "C"{ int yywrap(void){ return 1; } int my_yyinput(char *buf, int max_size) { int len; char c; extern FILE *yyin; if (feof(yyin)) { return 0; } for (len = 0; len < max_size; len++) { if ((c = fgetc(yyin)) == EOF) { break; } buf[len] = c; } return len; } //} %} %x STRING %x STRING_UCS_2 space [ \t\r\n] digit [0-9] integer -?(0|[1-9]{digit}*) number {integer}\.{digit}+ alpha [a-zA-Z] hex [0-9a-fA-F] %% <INITIAL>"}" return '}'; <INITIAL>"{" return '{'; <INITIAL>"]" return ']'; <INITIAL>"[" return '['; <INITIAL>"," return ','; <INITIAL>":" return ':'; <INITIAL>{space}* ; <INITIAL>"true" { yylval.jvalue = json_new_true(); return LITERAL; } <INITIAL>"false" { yylval.jvalue = json_new_false(); return LITERAL; } <INITIAL>"null" { yylval.jvalue = json_new_null(); return LITERAL; } <INITIAL>{integer} { yylval.jvalue = json_new_integer(atoi(yytext)); return LITERAL; } <INITIAL>{number} { yylval.jvalue = json_new_double(atof(yytext)); return LITERAL; } <INITIAL>({integer}|{number})[eE][+-]?{integer}+ { yylval.jvalue = json_new_double(strtod(yytext, NULL)); return LITERAL; } <INITIAL>\" { share_string = new_string(); BEGIN STRING; } <INITIAL>. { yyerror("Syntax Error!!"); } <STRING>\" { BEGIN INITIAL; yylval.jvalue = json_new_string(share_string->str); free_string(share_string); return lSTRING; } <STRING>\\\" my_chrcat(share_string, '"'); <STRING>\\\\ my_chrcat(share_string, '\\'); <STRING>\\\/ my_chrcat(share_string, '/'); <STRING>\\b my_chrcat(share_string, '\b'); <STRING>\\n|\n my_chrcat(share_string, '\n'); <STRING>\\r|\r my_chrcat(share_string, '\r'); <STRING>\\t my_chrcat(share_string, '\t'); <STRING>\\f my_chrcat(share_string, '\f'); <STRING>\\v my_chrcat(share_string, '\v'); <STRING>\\u{hex}{4} { unichar hi; sscanf(yytext + 2, "%4x", &hi); if (hi >= 0xd800) { ucs2_hi = hi; BEGIN STRING_UCS_2; } else if (hi & 0xff00) { hi = unicode2utf8(hi); my_chrcat(share_string, (hi >> 16) & 0xff); my_chrcat(share_string, (hi >> 8) & 0xff); my_chrcat(share_string, hi & 0xff); } else { my_chrcat(share_string, hi); } } <STRING>\\. my_chrcat(share_string, yytext[1]); <STRING>. my_chrcat(share_string, yytext[0]); <STRING_UCS_2>\\u{hex}{4} { unichar lo, hi = ucs2_hi; if (hi < 0xdc00) { sscanf(yytext + 2, "%4x", &lo); if (lo < 0xdc00 || lo >= 0xdfff) { yyerror("UCS-2 String Error"); } hi = ((hi - 0xd800) << 16) + (lo - 0xdc00) + 0x10000; hi = unicode2utf8(hi); my_chrcat(share_string, (hi >> 16) & 0xff); my_chrcat(share_string, (hi >> 8) & 0xff); my_chrcat(share_string, hi & 0xff); } else { yyerror("UCS-2 String Error"); } BEGIN STRING; } <STRING,STRING_UCS_2><<EOF>> { yyerror("Unterminated string!"); yyterminate(); } %%
parser.y
%{ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "json.h" int yyerror(const char *s); JSONValue *json; %} %union { JSONObject *jobject; JSONMember *jmember; JSONArray *jarray; JSONElement *jelement; JSONValue *jvalue; } %token <jvalue> lSTRING LITERAL %type <jobject> object %type <jmember> member %type <jarray> array %type <jelement> element %type <jvalue> value %start input %% input : /* empty*/ | value { json = $1; } ; object : '{' '}' { $$ = json_new_object(NULL); } | '{' member '}' { $$ = json_new_object($2); } ; member : lSTRING ':' value { $$ = json_new_member($1, $3); } | member ',' lSTRING ':' value { $$ = json_value_add_member($1, $3, $5); } ; array : '[' ']' { $$ = json_new_array(NULL); } | '[' element ']' { $$ = json_new_array($2); } element : value { $$ = json_new_element($1); } | element ',' value { $$ = json_value_add_element($1, $3); } ; value : LITERAL | lSTRING | object { $$ = json_new_object_value($1); } | array { $$ = json_new_array_value($1); } ; %% int yyerror(const char *s) { printf("Error: %s\n", s); return 0; }
json.h
#ifndef __JSON_H__ #define __JSON_H__ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <assert.h> #include "MyString.h" enum { JSON_VALUE_TYPE_INTEGER = 0, JSON_VALUE_TYPE_DOUBLE, JSON_VALUE_TYPE_STRING, JSON_VALUE_TYPE_OBJECT, JSON_VALUE_TYPE_ARRAY, JSON_VALUE_TYPE_TRUE, JSON_VALUE_TYPE_FALSE, JSON_VALUE_TYPE_NULL, ALL_JSON_VALUE_TYPE_NUM, }; #define JSON_VALUE_NAME_INTEGER "Integer" #define JSON_VALUE_NAME_DOUBLE "Double" #define JSON_VALUE_NAME_String "String" #define JSON_VALUE_NAME_Object "Object" #define JSON_VALUE_NAME_Array "Array" #define JSON_VALUE_NAME_TRUE "true" #define JSON_VALUE_NAME_FALSE "false" #define JSON_VALUE_NAME_NULL "null" #define JSON_TOKEN_EOF (-1) #define JSON_TOKEN_ERROR (0) #define JSON_TOKEN_LC '{' #define JSON_TOKEN_RC '}' #define JSON_TOKEN_COMMA ',' #define JSON_TOKEN_COLON ':' #define JSON_TOKEN_LB '[' #define JSON_TOKEN_RB ']' #define JSON_TOKEN_VALUE 'v' #define IS_HEX_CHAR(c) (isdigit(c) || (('a' <= c && 'f' >= c) || ('A' <= c && 'F' >= c))) #define HEX_CHAR2INT(c) (isdigit(c) ? c - '0' : ('a' <= c && 'f' >= c) || ('A' <= c && 'F' >= c) ? tolower(c) - 'a' + 10 : -1) /* Debug */ #define JSON_DEBUG_MODE 0 #define JSON_DEBUG_OUT stderr #if JSON_DEBUG_MODE #define JSON_DEBUG(str) fprintf(JSON_DEBUG_OUT, "%s(%d) : " str "\n", __FILE__, __LINE__) #else #define JSON_DEBUG(str) #endif #define JSON_INDENT_STR " " /*static char json_value_name[ALL_JSON_VALUE_TYPE_NUM][8] = { JSON_VALUE_NAME_INTEGER, JSON_VALUE_NAME_DOUBLE, JSON_VALUE_NAME_String, JSON_VALUE_NAME_Object, JSON_VALUE_NAME_Array, JSON_VALUE_NAME_TRUE, JSON_VALUE_NAME_FALSE, JSON_VALUE_NAME_NULL, };*/ typedef struct JSONObject JSONObject; typedef struct JSONMember JSONMember; typedef struct JSONArray JSONArray; typedef struct JSONElement JSONElement; typedef struct JSONValue JSONValue; typedef struct JSONToken JSONToken; struct JSONObject { JSONMember *members; }; struct JSONMember { JSONValue *key; JSONValue *value; JSONMember *next; }; struct JSONArray { JSONElement *elements; }; struct JSONElement { JSONValue *value; JSONElement *next; }; struct JSONValue { int type; union { int i_value; double d_value; char *s_value; JSONObject *o_value; JSONArray *a_value; }; }; struct JSONToken { char *str; char *next; int token; JSONValue *value; }; JSONValue *json_parser(FILE *fp); int yylex(); int yyparse(); JSONValue *json_new_value(); JSONValue *json_new_integer(int i); JSONValue *json_new_double(double d); JSONValue *json_new_string(char *str); JSONValue *json_new_nstring(char *str, int size); JSONValue *json_new_object_value(JSONObject *object); JSONValue *json_new_array_value(JSONArray *array); JSONValue *json_new_true(); JSONValue *json_new_false(); JSONValue *json_new_null(); void json_free_value(JSONValue *value); void json_print_value(FILE *fp, JSONValue *value, int depth); JSONElement *json_new_element(JSONValue *value); void json_free_element(JSONElement *element); JSONElement *json_value_add_element(JSONElement *element, JSONValue *value); void json_print_element(FILE *fp, JSONElement *element, int depth); JSONArray *json_new_array(JSONElement *element); void json_free_array(JSONArray *array); void json_print_array(FILE *fp, JSONArray *array, int depth); JSONMember *json_new_member(JSONValue *key, JSONValue *value); void json_free_member(JSONMember *member); JSONMember *json_value_add_member(JSONMember *member, JSONValue *key, JSONValue *value); JSONMember *json_member_add_member(JSONMember *member, JSONMember *value); void json_print_member(FILE *fp, JSONMember *member,int depth); JSONObject *json_new_object(JSONMember *member); void json_free_object(JSONObject *object); void json_print_object(FILE *fp, JSONObject *object, int depth); JSONValue *json_value_at_index(JSONValue *value, int index); JSONValue *json_value_at_key(JSONValue *value, char *key); MyString *get_ustring(char *p); unichar unicode2utf8(unichar uc); void json_print(FILE *fp, JSONValue *value); void json_print_indent(FILE *fp, int depth); #endif
json.c
#include "json.h" JSONValue *json_parser(FILE *fp) { extern FILE *yyin; extern JSONValue *json; yyin = fp; if (yyparse()) { return NULL; } return json; } JSONValue *json_new_value() { JSONValue *value = (JSONValue *)malloc(sizeof(JSONValue)); return value; } JSONValue *json_new_integer(int i) { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_INTEGER; value->i_value = i; return value; } JSONValue *json_new_double(double d) { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_DOUBLE; value->d_value = d; return value; } JSONValue *json_new_string(char *str) { return json_new_nstring(str, strlen(str)); } JSONValue *json_new_nstring(char *str, int size) { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_STRING; value->s_value = (char *)malloc(size + 1); strncpy(value->s_value, str, size); value->s_value[size] = '\0'; return value; } JSONValue *json_new_object_value(JSONObject *object) { JSONValue *value; if (!object) { return NULL; } value = json_new_value(); value->type = JSON_VALUE_TYPE_OBJECT; value->o_value = object; return value; } JSONValue *json_new_array_value(JSONArray *array) { JSONValue *value; if (!array) { return NULL; } value = json_new_value(); value->type = JSON_VALUE_TYPE_ARRAY; value->a_value = array; return value; } JSONValue *json_new_true() { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_TRUE; return value; } JSONValue *json_new_false() { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_FALSE; return value; } JSONValue *json_new_null() { JSONValue *value = json_new_value(); value->type = JSON_VALUE_TYPE_NULL; return value; } void json_free_value(JSONValue *value) { if (!value) { return; } if (value->type == JSON_VALUE_TYPE_STRING) { free(value->s_value); } else if (value->type == JSON_VALUE_TYPE_OBJECT) { json_free_object(value->o_value); } else if (value->type == JSON_VALUE_TYPE_ARRAY) { json_free_array(value->a_value); } free(value); } void json_print_value(FILE *fp, JSONValue *value, int depth) { MyString *str; switch (value->type) { case JSON_VALUE_TYPE_INTEGER: fprintf(fp, "%d", value->i_value); break; case JSON_VALUE_TYPE_DOUBLE: fprintf(fp, "%f", value->d_value); break; case JSON_VALUE_TYPE_STRING: str = get_ustring(value->s_value); fprintf(fp, "\"%s\"", str->str); free_string(str); break; case JSON_VALUE_TYPE_OBJECT: json_print_object(fp, value->o_value, depth ); break; case JSON_VALUE_TYPE_ARRAY: json_print_array(fp, value->a_value, depth); break; case JSON_VALUE_TYPE_TRUE: fprintf(fp, "true"); break; case JSON_VALUE_TYPE_FALSE: fprintf(fp, "false"); break; case JSON_VALUE_TYPE_NULL: fprintf(fp, "null"); break; } } JSONElement *json_new_element(JSONValue *value) { JSONElement *element = (JSONElement *)malloc(sizeof(JSONElement)); element->value = value; element->next = NULL; return element; } void json_free_element(JSONElement *element) { if (!element) { return; } json_free_element(element->next); free(element); } JSONElement *json_value_add_element(JSONElement *element, JSONValue *value) { JSONElement *e; if (!element) { return json_new_element(value); } for (e = element; e->next; e = e->next) {} e->next = json_new_element(value); return element; } void json_print_element(FILE *fp, JSONElement *element, int depth) { for (; element; element = element->next) { json_print_indent(fp, depth); json_print_value(fp, element->value, depth); if (element->next) { fprintf(fp, ", "); } fprintf(fp, "\n"); } } JSONArray *json_new_array(JSONElement *element) { JSONArray *array = (JSONArray *)malloc(sizeof(JSONArray)); array->elements = element; return array; } void json_free_array(JSONArray *array) { if (!array) { return; } json_free_element(array->elements); free(array); } void json_print_array(FILE *fp, JSONArray *array, int depth) { json_print_indent(fp, depth); fprintf(fp, "[\n"); json_print_element(fp, array->elements, depth + 1); json_print_indent(fp, depth); fprintf(fp, "]"); } JSONMember *json_new_member(JSONValue *key, JSONValue *value) { JSONMember *member = (JSONMember *)malloc(sizeof(JSONMember)); member->key = key; member->value = value; member->next = NULL; return member; } void json_free_member(JSONMember *member) { if (!member) { return; } json_free_member(member->next); json_free_value(member->key); json_free_value(member->value); free(member); } JSONMember *json_value_add_member(JSONMember *member, JSONValue *key, JSONValue *value) { return json_member_add_member(member, json_new_member(key, value)); } JSONMember *json_member_add_member(JSONMember *member, JSONMember *value) { JSONMember *m; if (!member) { return value; } for (m = member; m->next; m = m->next) {} m->next = value; return member; } void json_print_member(FILE *fp, JSONMember *member,int depth) { for (; member; member = member->next) { json_print_indent(fp, depth); json_print_value(fp, member->key, depth); fprintf(fp, " : "); json_print_value(fp, member->value, depth); if (member->next) { fprintf(fp, ", "); } fprintf(fp, "\n"); } } JSONObject *json_new_object(JSONMember *member) { JSONObject *object = (JSONObject *)malloc(sizeof(JSONObject)); object->members = member; return object; } void json_free_object(JSONObject *object) { if (!object) { return; } json_free_member(object->members); free(object); } void json_print_object(FILE *fp, JSONObject *object, int depth) { json_print_indent(fp, depth); fprintf(fp, "{\n"); json_print_member(fp, object->members, depth + 1); json_print_indent(fp, depth); fprintf(fp, "}"); } JSONValue *json_value_at_index(JSONValue *value, int index) { JSONElement *e; if (value->type != JSON_VALUE_TYPE_ARRAY) { return NULL; } for (e = value->a_value->elements; index && e; index--, e = e->next) {} if (!e) { return NULL; } return e->value; } JSONValue *json_value_at_key(JSONValue *value, char *key) { JSONMember *m; if (!value || value->type != JSON_VALUE_TYPE_OBJECT) { return NULL; } for (m = value->o_value->members; m; m = m->next) { assert(m->key->type == JSON_VALUE_TYPE_STRING); if (strcmp(m->key->s_value, key) == 0) { return m->value; } } return NULL; } MyString *get_ustring(char *p) { MyString *str = new_string(); char c; for (; (c = *p); p++) { switch (c) { case '\\': my_chrcat(str, '\\'); c = '\\'; break; case '/': my_chrcat(str, '\\'); c = '/'; break; case '"': my_chrcat(str, '\\'); c = '"'; break; case '\b': my_chrcat(str, '\\'); c = 'b'; break; case '\n': my_chrcat(str, '\\'); c = 'n'; break; case '\r': my_chrcat(str, '\\'); c = 'r'; break; case '\t': my_chrcat(str, '\\'); c = 't'; break; case '\f': my_chrcat(str, '\\'); c = 'f'; break; } my_chrcat(str, c); } return str; } unichar unicode2utf8(unichar uc) { return (0xe0 | ((uc >> 12) & 0x0f)) << 16 | (0x80 | ((uc >> 6) & 0x3f)) << 8 | (0x80 | (uc & 0x3f)); } void json_print(FILE *fp, JSONValue *value) { json_print_value(fp, value, 0); fprintf(fp, "\n"); } void json_print_indent(FILE *fp, int depth) { int i; for (i = 0; i < depth; i++) { fprintf(fp, JSON_INDENT_STR); } }
Makefile
CC = gcc YACC = bison LEX = flex FLAGS = -O2 -Wall LINK_LIB = OBJS = main.o json.o MyString.o parser.tab.o lex.yy.o GENERAT_SRC = parser.tab.c parser.tab.h lex.yy.c json.exe : $(OBJS) $(CC) $(FLAGS) -o $@ $^ $(LINK_LIB) .c.o : $(CC) $(FLAGS) -c $< parser.tab.c: parser.y $(YACC) -d -b parser $^ lex.yy.c: lex.l $(LEX) $^ main.o : json.h json.o : json.h MyString.h MyString.o : MyString.h parser.tab.o : parser.tab.c parser.tab.h clean : -rm -f *.exe $(OBJS) $(GENERAT_SRC) *.stackdump *~ \#*