虽然没有使用标题里的两个著名工具,但我们还是可以参考一下。
在制作过程中,为了了解两个工具,偶制作了yacc和lex的脚本,并安装了Bison和Flex。
先看yacc的脚本:
利用该脚本生成的语法分析程序会将指定的源代码文件读取分析后打印,偶用它来测试自己编写的语法规则。
%{ #include <stdio.h> #include <string.h> extern FILE *yyin; extern "C" { int yylex(void); } # define YYLTYPE_IS_DECLARED 1 # define YYLTYPE_IS_TRIVIAL 1 typedef struct YYLTYPE { int first_line; int first_column; int last_line; int last_column; } YYLTYPE; void yyerror(const char* err); char __yyac_string_buffer[65535]; #define format(strptr, ...) \ sprintf(__yyac_string_buffer, __VA_ARGS__); \ strptr = strdup(__yyac_string_buffer); %} %union { char *string; } %error-verbose %debug %glr-parser /* TOKENS: Space & Comments */ %token <string> SPACE %token <string> SINGLE_LINE_COMMENTS %token <string> MULTI_LINE_COMMENTS /* TOKENS: Type Define */ %token <string> TK_CONST %token <string> TK_VOID_TYPE %token <string> TK_CHAR_TYPE %token <string> TK_SHORT_TYPE %token <string> TK_INT_TYPE %token <string> TK_FLOAT_TYPE %token <string> TK_SIGNED_TYPE %token <string> TK_UNSIGNED_TYPE /* TOKEN: Value & Identifier*/ %token <string> TK_ID %token <string> TK_CHAR %token <string> TK_INTEGER %token <string> TK_FLOAT %token <string> TK_HEX %token <string> TK_STRING /* TOKENS: Expression Operator */ %token <string> TK_ASSIGN TK_ADD_ASSIGN TK_SUB_ASSIGN TK_MUL_ASSIGN TK_DIV_ASSIGN TK_MOD_ASSIGN TK_AND_ASSIGN TK_OR_ASSIGN TK_XOR_ASSIGN TK_LSHIFT_ASSIGN TK_RSHIFT_ASSIGN %token <string> TK_LOGIC_AND TK_LOGIC_OR %token <string> TK_LESS TK_LESS_EQ TK_GREATER TK_GREATER_EQ TK_EQUAL TK_NOT_EQUAL %token <string> TK_ADD TK_SUB TK_XOR TK_AND TK_OR TK_LSHIFT TK_RSHIFT %token <string> TK_MUL TK_DIV TK_MOD %token <string> TK_NOT TK_BITWISE_NOT TK_SELF_ADD TK_SELF_SUB /* TOKENS: Contol Flow */ %token <string> TK_IF TK_ELSE %token <string> TK_FOR %token <string> TK_DO TK_WHILE %token <string> TK_SWITCH TK_CASE TK_DEFAULT %token <string> TK_BREAK TK_CONTINUE TK_RETURN /* Type & Value */ %type <string> DataType %type <string> StringValue %type <string> Value %type <string> Expression /* Variable */ %type <string> VarDeclID %type <string> VarDeclIDList %type <string> VariableDecl /* Statement */ %type <string> Statement %type <string> StatementSet %type <string> Block /* Control Flow */ %type <string> ControlFlow %type <string> If DoWhile While Return Break Continue %type <string> For For_Init For_Condition For_Iterator %type <string> Switch CaseBranch CaseValue Case Default CaseSet /* Function */ %type <string> FunctionCalling %type <string> FunctionArgumentDefine %type <string> FunctionForwardingDecl %type <string> FunctionParameterSet %type <string> FunctionDecl %type <string> FunctionDefine /* Program */ %type <string> ProgramStatement %type <string> ProgramStatementSet %type <string> Program /* Operator precedence */ %left TK_ASSIGN TK_ADD_ASSIGN TK_SUB_ASSIGN TK_MUL_ASSIGN TK_DIV_ASSIGN TK_MOD_ASSIGN TK_AND_ASSIGN TK_OR_ASSIGN TK_XOR_ASSIGN TK_LSHIFT_ASSIGN TK_RSHIFT_ASSIGN %left TK_LOGIC_AND TK_LOGIC_OR %left TK_LESS TK_LESS_EQ TK_GREATER TK_GREATER_EQ TK_EQUAL TK_NOT_EQUAL %left TK_ADD TK_SUB TK_XOR TK_AND TK_OR TK_LSHIFT TK_RSHIFT %left TK_MUL TK_DIV TK_MOD %left SELF_OP %left UNARY %start Program %% DataType : TK_VOID_TYPE { format($$, "void"); } | TK_CHAR_TYPE { format($$, "char"); } | TK_SHORT_TYPE { format($$, "short"); } | TK_INT_TYPE { format($$, "int"); } | TK_FLOAT_TYPE { format($$, "float"); } | TK_SIGNED_TYPE { format($$, $1); } | TK_UNSIGNED_TYPE { format($$, $1); } | TK_SIGNED_TYPE TK_CHAR_TYPE { format($$, "%s %s", $1, $2); } | TK_SIGNED_TYPE TK_SHORT_TYPE { format($$, "%s %s", $1, $2); } | TK_SIGNED_TYPE TK_INT_TYPE { format($$, "%s %s", $1, $2); } | TK_UNSIGNED_TYPE TK_CHAR_TYPE { format($$, "%s %s", $1, $2); } | TK_UNSIGNED_TYPE TK_SHORT_TYPE { format($$, "%s %s", $1, $2); } | TK_UNSIGNED_TYPE TK_INT_TYPE { format($$, "%s %s", $1, $2); } ; StringValue : TK_STRING { format($$, $1); } | StringValue TK_STRING { format($$, "%s\n%s", $1, $2); } ; Value : TK_CHAR { format($$, "'%s'", $1); } | StringValue { format($$, "\"%s\"", $1); } | TK_INTEGER { format($$, $1); } | TK_HEX { format($$, $1); } | TK_FLOAT { format($$, $1); } ; Expression : TK_ID { format($$, $1); } | Value { format($$, $1); } /* Operators */ /* Arithmetic */ | Expression TK_ADD Expression { format($$, "%s + %s", $1, $3); } | Expression TK_SUB Expression { format($$, "%s - %s", $1, $3); } | Expression TK_MUL Expression { format($$, "%s * %s", $1, $3); } | Expression TK_DIV Expression { format($$, "%s / %s", $1, $3); } | Expression TK_MOD Expression { format($$, "%s %% %s", $1, $3); } | Expression TK_XOR Expression { format($$, "%s ^ %s", $1, $3); } /* Self Add/Sub */ | Expression TK_SELF_ADD %prec SELF_OP { format($$, "%s++", $1); } | Expression TK_SELF_SUB %prec SELF_OP { format($$, "%s--", $1); } | TK_SELF_ADD Expression %prec SELF_OP { format($$, "++%s", $2); } | TK_SELF_SUB Expression %prec SELF_OP { format($$, "--%s", $2); } /* Unary */ | TK_ADD Expression %prec UNARY { format($$, "+ %s", $2); } | TK_SUB Expression %prec UNARY { format($$, "- %s", $2); } | TK_NOT Expression %prec UNARY { format($$, "! %s", $2); } | TK_BITWISE_NOT Expression %prec UNARY { format($$, "~ %s", $2); } /* Logic */ | Expression TK_GREATER Expression { format($$, "%s > %s", $1, $3); } | Expression TK_GREATER_EQ Expression { format($$, "%s >= %s", $1, $3); } | Expression TK_LESS Expression { format($$, "%s < %s", $1, $3); } | Expression TK_LESS_EQ Expression { format($$, "%s <= %s", $1, $3); } | Expression TK_EQUAL Expression { format($$, "%s == %s", $1, $3); } | Expression TK_NOT_EQUAL Expression { format($$, "%s != %s", $1, $3); } | Expression TK_LOGIC_AND Expression { format($$, "%s && %s", $1, $3); } | Expression TK_LOGIC_OR Expression { format($$, "%s || %s", $1, $3); } /* Bit */ | Expression TK_AND Expression { format($$, "%s & %s", $1, $3); } | Expression TK_OR Expression { format($$, "%s | %s", $1, $3); } | Expression TK_LSHIFT Expression { format($$, "%s << %s", $1, $3); } | Expression TK_RSHIFT Expression { format($$, "%s >> %s", $1, $3); } /* Assign */ | Expression TK_ASSIGN Expression { format($$, "%s = %s", $1, $3); printf($$); } | Expression TK_ADD_ASSIGN Expression { format($$, "%s += %s", $1, $3); } | Expression TK_SUB_ASSIGN Expression { format($$, "%s -= %s", $1, $3); } | Expression TK_MUL_ASSIGN Expression { format($$, "%s *= %s", $1, $3); } | Expression TK_DIV_ASSIGN Expression { format($$, "%s /= %s", $1, $3); } | Expression TK_MOD_ASSIGN Expression { format($$, "%s %= %s", $1, $3); } | Expression TK_AND_ASSIGN Expression { format($$, "%s &= %s", $1, $3); } | Expression TK_OR_ASSIGN Expression { format($$, "%s |= %s", $1, $3); } | Expression TK_XOR_ASSIGN Expression { format($$, "%s ^= %s", $1, $3); } | Expression TK_LSHIFT_ASSIGN Expression { format($$, "%s <<= %s", $1, $3); } | Expression TK_RSHIFT_ASSIGN Expression { format($$, "%s >>= %s", $1, $3); } /* Parentheses */ | '(' Expression ')' { format($$, "( %s )", $2); } | FunctionCalling { format($$, $1); } ; VarDeclID : TK_ID { format($$, "%s", $1); } | TK_ID TK_ASSIGN Expression { format($$, "%s=%s", $1, $3); } ; VarDeclIDList : VarDeclID { format($$, "%s", $1); } | VarDeclIDList ',' VarDeclID { format($$, "%s, %s", $1, $3); } ; VariableDecl : DataType VarDeclIDList { format($$, "%s %s", $1, $2); } ; Statement : /* Dummy */ ';' { format($$, "/* empty */;\n"); } | VariableDecl ';' { format($$, "%s;\n", $1); } | Expression ';' { format($$, "%s;\n", $1); } | Block { format($$, $1); } | ControlFlow { format($$, $1); } ; StatementSet : Statement { format($$, $1); } | StatementSet Statement { format($$, "%s%s", $1, $2); } ; Block : '{' '}' { format($$, "{\n/* Empty Block */\n}\n"); } | '{' StatementSet '}' { format($$, "{\n%s\n}\n", $2); } ; If : TK_IF '(' Expression ')' Statement { format($$, "if(%s)\n\t%s", $3, $5); } | TK_IF '(' Expression ')' Statement TK_ELSE Statement { format($$, "if(%s)\n\t%selse\n\t%s", $3, $5, $7); } ; For_Init : /* Empty */ { format($$, "/* empty */"); } | VariableDecl { format($$, $1); } | Expression { format($$, $1); } ; For_Condition : /* Empty */ { format($$, "/* empty */"); } | Expression { format($$, $1); } ; For_Iterator : /* Empty */ { format($$, "/* empty */"); } | Expression { format($$, $1); } ; For : TK_FOR '(' For_Init ';' For_Condition ';' For_Iterator ')' Statement { format($$, "for(%s, %s, %s)\n%s\n", $3, $5, $7, $9); } ; DoWhile : TK_DO '{' Statement '}' TK_WHILE '(' Expression ')' ';' { format($$, "do\n{\n%s\n}while(%s);\n", $3, $7); } ; While : TK_WHILE '(' Expression ')' Statement { format($$, "while(%s)\n%s\n", $3, $5); } ; CaseValue : TK_CHAR { format($$, $1); } | TK_INTEGER { format($$, $1); } | TK_HEX { format($$, $1); } ; CaseBranch : TK_CASE CaseValue ':' /* Empty */ { format($$, "case %s: /* Empty */", $2); } | TK_CASE CaseValue ':' StatementSet { format($$, "case %s:\n%s", $2, $4); } ; Default : TK_DEFAULT ':' StatementSet { format($$, "default:\n%s\n", $3); } ; Case : CaseBranch { format($$, "%s", $1); } | Default { format($$, "%s", $1); } ; CaseSet : Case { format($$, $1); } | CaseSet Case { format($$, "%s\n%s", $1, $2); } ; Switch : TK_SWITCH '(' Expression ')' '{' CaseSet '}' { format($$, "switch(%s)\n{\n%s\n} // switch\n", $3, $6); } ; Break : TK_BREAK ';' { format($$, "break ;\n"); } ; Continue : TK_CONTINUE ';' { format($$, "continue ;\n"); } ; Return : TK_RETURN ';' { format($$, "return ;\n"); } | TK_RETURN Expression ';' { format($$, "return %s;\n", $2); } ; ControlFlow : If { format($$, $1); } | For { format($$, $1); } | DoWhile { format($$, $1); } | While { format($$, $1); } | Switch { format($$, $1); } | Break { format($$, $1); } | Continue { format($$, $1); } | Return { format($$, $1); } ; /* Function */ FunctionArgumentDefine : DataType TK_ID { format($$, "%s %s,", $1, $2); } | DataType TK_ID ',' FunctionArgumentDefine { format($$, "%s %s, %s", $1, $2, $4); } ; FunctionDecl : DataType TK_ID '(' ')' { format($$, "%s %s()", $1, $2); } | DataType TK_ID '(' FunctionArgumentDefine ')' { format($$, "%s %s(%s)", $1, $2, $4); } ; FunctionForwardingDecl : FunctionDecl ';' { format($$, $1); } ; FunctionDefine : FunctionDecl Block { format($$, "%s\n%s", $1, $2); } ; FunctionParameterSet : Expression { format($$, $1); } | Expression ',' FunctionParameterSet { format($$, "%s, %s", $1, $3); } ; FunctionCalling : TK_ID '(' ')' { format($$, "%s()", $1); } | TK_ID '(' FunctionParameterSet ')' { format($$, "%s(%s)", $1, $3); } ; ProgramStatement : VariableDecl ';' { format($$, "%s\n", $1); } | FunctionForwardingDecl { format($$, "%s\n", $1); } | FunctionDefine { format($$, "%s\n", $1); } ; ProgramStatementSet : ProgramStatement { format($$, $1); } | ProgramStatementSet ProgramStatement { format($$, "%s%s", $1, $2); } ; Program : ProgramStatementSet { printf($$); } ; %% void yyerror(const char* err) { printf("\nError:%s\n", err); } void main() { printf("Running..\n"); printf("Loading file...\n"); char path[] = "z:\\Syntax.lex.test\\Syntax.Test.Code.txt"; yyin = fopen(path, "r"); if (yyin) { printf("Parsing...\n"); do { yyparse(); }while(!feof(yyin)); printf("\nDone."); fclose(yyin); } else printf("Can't open code file"); }
注意,上述yacc脚本指定了用于测试的代码的路径,你要根据自己的实际情况修改。
yacc和lex总是成对出现。
以下是lex脚本,该脚本对代码进行分词:
%{ #include "syntax.yacc.hh" #include <string.h> #undef yywrap #define yywrap() 1 extern "C" { int yylex(void); } void error(const char* err) { printf("Error:%s\n", err); } %} DIGIT [0-9] %x comment chr str %% char __lex_string_buffer[4096]; char *__lex_string_buf_ptr; [ \t\r\n]+ ; const { return TK_CONST; } void { return TK_VOID_TYPE; } char { return TK_CHAR_TYPE; } short { return TK_SHORT_TYPE; } int { return TK_INT_TYPE; } float { return TK_FLOAT_TYPE; } signed { return TK_SIGNED_TYPE; } unsigned { return TK_UNSIGNED_TYPE; } if { return TK_IF; } else { return TK_ELSE; } for { return TK_FOR; } do { return TK_DO; } while { return TK_WHILE; } switch { return TK_SWITCH; } case { return TK_CASE; } default { return TK_DEFAULT; } break { return TK_BREAK; } return { return TK_RETURN; } && { return TK_LOGIC_AND; } \|\| { return TK_LOGIC_OR; } & { return TK_AND; } \| { return TK_OR; } \>\> { return TK_RSHIFT; } \<\< { return TK_LSHIFT; } == { return TK_EQUAL; } = { return TK_ASSIGN; } != { return TK_NOT_EQUAL; } \>= { return TK_GREATER_EQ; } \> { return TK_GREATER; } \<= { return TK_LESS_EQ; } \< { return TK_LESS; } \+\+ { return TK_SELF_ADD; } -- { return TK_SELF_SUB; } \+= { return TK_ADD_ASSIGN; } -= { return TK_SUB_ASSIGN; } \*= { return TK_MUL_ASSIGN; } \/= { return TK_DIV_ASSIGN; } \%= { return TK_MOD_ASSIGN; } &= { return TK_AND_ASSIGN; } \|= { return TK_OR_ASSIGN; } \^= { return TK_XOR_ASSIGN; } \<\<= { return TK_LSHIFT_ASSIGN; } \>\>= { return TK_RSHIFT_ASSIGN; } \+ { return TK_ADD; } - { return TK_SUB; } \* { return TK_MUL; } \/ { return TK_DIV; } \% { return TK_MOD; } \^ { return TK_XOR; } ! { return TK_NOT; } ~ { return TK_BITWISE_NOT; } {DIGIT}+ { yylval.string = strdup(yytext); return TK_INTEGER; } 0[xX][a-fA-F0-9]+ { yylval.string = strdup(yytext); return TK_HEX; } ({DIGIT}+\.{DIGIT}*)|({DIGIT}+\.?{DIGIT}*[fF])|({DIGIT}+\.?{DIGIT}*[eE][+-]{DIGIT}+) { yylval.string = strdup(yytext); return TK_FLOAT; } [a-zA-Z\$_][a-zA-Z\$_0-9]* { yylval.string = strdup(yytext); return TK_ID; } "//"[^\n]*\n { /* Single line comment */ } "/*" BEGIN(comment); <comment>[^*\n]* <comment>"*"+[^*/\n]* <comment>\n <comment>"*"+"/" { BEGIN(INITIAL); /* Multi-line comments */ } ' __lex_string_buf_ptr = __lex_string_buffer; BEGIN(chr); <chr>' { BEGIN(INITIAL); *__lex_string_buf_ptr = '\0'; __lex_string_buf_ptr = __lex_string_buffer; yylval.string = strdup(__lex_string_buf_ptr); return TK_CHAR; } <chr>\\r *__lex_string_buf_ptr++ = '\r'; <chr>\\n *__lex_string_buf_ptr++ = '\n'; <chr>\\t *__lex_string_buf_ptr++ = '\t'; <chr>\\b *__lex_string_buf_ptr++ = '\b'; <chr>\\f *__lex_string_buf_ptr++ = '\f'; <chr>\\[0-7]{1,3} { int result = 0; (void)scanf(yytext + 1, "%o", &result); if (result > 0xff) { error("Invalid escape value: out of bounds"); yyterminate(); } *__lex_string_buf_ptr++ = result; } <chr>[^\\\t\n'] { *__lex_string_buf_ptr++ = yytext[0]; } \" __lex_string_buf_ptr = __lex_string_buffer; BEGIN(str); <str>\" { /* saw closing quote - all done */ BEGIN(INITIAL); *__lex_string_buf_ptr = '\0'; __lex_string_buf_ptr = __lex_string_buffer; /* return string constant token type and * value to parser */ yylval.string = strdup(__lex_string_buf_ptr); return TK_STRING; } <str>\n { /* error - unterminated string constant */ /* generate error message */ error("Unterminated string"); yyterminate(); } <str>\\[0-7]{1,3} { /* octal escape sequence */ int result; (void) sscanf( yytext + 1, "%o", &result ); if ( result > 0xff ) { /* error, constant is out-of-bounds */ error("Char value out of bound."); yyterminate(); } *__lex_string_buf_ptr++ = result; } <str>\\[0-9]+ { /* generate error - bad escape sequence; something * like '\48' or '\0777777' */ error("Invalid escape sequence"); yyterminate(); } <str>\\n { *__lex_string_buf_ptr++ = '\n'; } <str>\\t { *__lex_string_buf_ptr++ = '\t'; } <str>\\r { *__lex_string_buf_ptr++ = '\r'; } <str>\\b { *__lex_string_buf_ptr++ = '\b'; } <str>\\f { *__lex_string_buf_ptr++ = '\f'; } <str>\\(.|\n) { *__lex_string_buf_ptr++ = yytext[1]; } <str>[^\\\n\"]+ { char *yptr = yytext; while ( *yptr ) *__lex_string_buf_ptr++ = *yptr++; } . return yytext[0]; %%
lex的部分语法参考了(其实是直接拷贝)安装所附带的指南中的代码。
一并给出Bison和Flex的工作脚本吧,该脚本是BAT文件:
@echo off set IN_ROOT=z:\SharpC\Grammar\Common\ set OUT_ROOT=z:\Syntax.Lex.Test\ set GNU_ROOT=C:\gnuwin32\ set GNU_INC=%GNU_ROOT%include set GNU_LIB=%GNU_ROOT%lib set GNU_FLEX_LIB=%GNU_LIB%\libfl.a set GNU_YACC_LIB=%GNU_LIB%\liby.a set YACC_SRC=%IN_ROOT%Syntax.yacc set YACC_TARGET_CC=%OUT_ROOT%Syntax.yacc.cc set YACC_TARGET_HH=%OUT_ROOT%Syntax.yacc.hh set YACC_TARGET_RPT=%OUT_ROOT%Syntax.report.yacc.txt set FLEX_SRC=%IN_ROOT%Syntax.Lex set FLEX_TARGET=%OUT_ROOT%Syntax.lex.cc set BIN_TARGET=%OUT_ROOT%Debug\Syntax.Lex.exe cls @echo ============= Clean =============== if exist "%YACC_TARGET_CC%" del "%YACC_TARGET_CC%" if exist "%YACC_TARGET_HH%" del "%YACC_TARGET_HH%" if exist "%FLEX_TARGET%" del "%FLEX_TARGET%" if exist "%BIN_TARGET%" del "%BIN_TARGET%" @echo ============= Syntax =============== bison -d -v "%YACC_SRC%" -o"%YACC_TARGET_CC%" --report=state --report-file="%YACC_TARGET_RPT%" if not errorlevel 0 goto failed @echo ============= Lex =============== flex -o"%FLEX_TARGET%" "%FLEX_SRC%" if not errorlevel 0 goto failed @echo ============= Build =============== cl.exe "%YACC_TARGET_CC%" "%LEX_TARGET%" /I"%GNU_INC%" /link "%GNU_FLEX_LIB%" "%GNU_YACC_LIB%" /OUT:"%BIN_TARGET%" if not errorlevel 0 goto failed if not exist "%BIN_TARGET%" goto failed @echo ============= Run =============== "%BIN_TARGET%" goto exit :failed @echo ================================= @echo Failed. :exit
忘记说了,整个工作在Windows+VSE 2012下完成。工作脚本也应该在VSE的环境命令窗口中运行。不晓得VSE和其环境命令窗口为何物的同学,请假装没有看到这个系列文章吧。