中间代码生成器
一、实验目的
掌握中间代码生成器的构造原理和编程方法。
二、实验内容
用自顶向下方法或Yacc进行语法分析的基础上,编写一个中间代码生成程序。(见教材附录 A.1,p394)
program → block
block → { decls stmts }
decls → decls decl | e
decl → type id ;
type → type [num] //数组可以不做
| basic //四种基本数据类型 int | float | char | bool
stmts → stmts stmt | e
stmt → id= expr ;
| if ( bool ) stmt
| if ( bool) stmt else stmt
| while (bool) stmt
| do stmt while (bool ) ;
| break ; //break可以不做
| block
bool → bool || join | join
join → join && equality | equality
equality → equality == rel | equality != rel | rel
rel → expr < expr
| expr <= expr
| expr > expr
| expr >= expr
| expr
expr → expr + term
| expr - term
| term
term → term * unary
| term / unary
| unary
unary → !unary | - unary | factor
factor → ( expr ) | id| num
。
三、实验要求
1.个人完成,提交实验报告。
2.实验的结果为:生成中间代码 (三地址码)
{
a=10;
x = b - c;
y = a * x;
z = x * d;
abcd = a + x +y;
if(a>10)
a=a+1;
a=a+2;
}
实验结果生成的代码片段
0: a = 10
1: t0 = b - c
2: x = t0
3: t1 = a * x
4: y = t1
5: t2 = x * d
6: z = t2
7: t3 = a + x
8: t4 = t3 + y
9: abcd = t4
10: if a > 10 goto 12
11: goto 14
12: t5 = a + 1
13: a = t5
14: t6 = a + 2
15: a = t6
个人对于本次中间代码生成器-编译实验的心得:
#define YYSTYPE node:定义符号栈属性结点类型
typedef struct node
{
instrlist *truelist, *falselist, *nextlist;//相当于有一个 正确链表与 错误链表 每个链表结点都有个值
char addr[256];// 地址字段 存放具体结点的内容比如 a 8 10 等
char lexeme[256];//字符串指端
char oper[3];//操作符字段 具体存放操作符对应的字符串
int instr; //是否要回填指示 具体内容是行号
}node;
int main()
{
list = newcodelist();
//申请一个内存空间给list
// int linecnt, capacity; 行数 与 最大行数
// int temp_index;
// char **code; 总代码
freopen("test.in", "rt+", stdin);
freopen("test.out", "wt+", stdout);
//打开文件
yyparse();
print(list);
fclose(stdin);
//关闭文件
fclose(stdout);
//关闭文件
return 0;
}
int gen_if(codelist *dst, node left, char* op, node right)
{
sprintf(tmp, "if %s %s %s goto", left.addr, op, right.addr);//第一个字符串 操作符 第二个字符串
Gen(dst, tmp); //将tmp的内容存放到 总字符串 list的code字段当中
return 0;
}
int Gen(codelist *dst, char *str) //输入 第一个参数为 list 第二个参数是 总字符串 tmp[1024]
{ //目标是 将tmp的内容存放到 总字符串 list的code字段当中
if (dst->linecnt >= dst->capacity)
{
dst->capacity += 1024;
dst->code = (char**)realloc(dst->code, sizeof(char*)*dst->capacity);
if (dst->code == NULL)
{
printf("short of memeory\n");
return 0;
}
}
dst->code[dst->linecnt] = (char*)malloc(strlen(str)+20); //申请一个内存空间存放中间字符串
strcpy(dst->code[dst->linecnt], str); //字符串复制
dst->linecnt++; //行号加1
return 0;
}
int backpatch(codelist *dst, instrlist *list, int instrno) //总代码 dst 错误列表list 错误的指向行数
{
if (list!=NULL) //如果错误的列表不为空
{
listele *p=list->first;
char tmp[20];
sprintf(tmp, " %d", instrno); 取出一个
while (p!=NULL)
{
if (p->instrno<dst->linecnt)
strcat(dst->code[p->instrno], tmp); //错误列表:1 2 3 4 假设都指向instrno=6 那么将6转化为字符串tmp
//再添加到每个字符串的末尾
p=p->next;
}
}
return 0;
}
// int linecnt, capacity; 行数 与 最大行数
// int temp_index;
// char **code;
重点是对课上的回填技术,还有
栈中的可能属性类别也就是node结点的属性种类,还有的是全局变量list使用的结点除了行号,最大行号,还有就是三地址码存放的chor **类型的code。
jia.l
%{
#include "jia.tab.h"
%}
delim [ \t\n\r]
ws {delim}+
letter [A-Za-z]
digit [0-9]
id {letter}({letter}|{digit})*
integer {digit}+
exponent E[+-]?{integer}
number {integer}{exponent}?
real integer(\.integer)?{exponent}?
%option noyywrap
%%
if { return( IF ); }
else { return( ELSE ); }
while { return( WHILE ); }
do { return( DO ); }
for { return( FOR ); }
switch { return( SWITCH ); }
case { return( CASE ); }
default { return( DEFAULT ); }
break { return( BREAK ); }
true { return( TRUE ); }
false { return( FALSE ); }
int { return( INT ); }
long { return( LONG ); }
char { return( CHAR ); }
bool { return( BOOL ); }
float { return( FLOAT ); }
double { return( DOUBLE ); }
"&&" { return( AND ); }
"||" { return( OR ); }
"!" { return( '!'); }
"<"|"<="|">"|">="|"!="|"==" { filloperator(&yylval, yytext); return( REL); }
"++" { return( INC ); }
"--" { return( DEC ); }
"+" { return( '+' ); }
"-" { return( '-' ); }
"*" { return( '*' ); }
"/" { return( '/' ); }
"=" { return( '=' ); }
"{" { return( '{' ); }
"}" { return( '}' ); }
"[" { return( '[' ); }
"]" { return( ']' ); }
"(" { return( '(' ); }
")" { return( ')' ); }
";" { return( ';' ); }
{ws} { }
{id} { filllexeme(&yylval, yytext); return( ID ); }
{number} { filllexeme(&yylval, yytext); return( NUMBER ); }
{real} { filllexeme(&yylval, yytext); return( REAL ); }
%%
jia.y
%{
#include "xu.h"
#define YYSTYPE node
#include "jia.tab.h"
int yyerror();
int yyerror(char* msg);
extern int yylex();
codelist* list;
%}
%token BASIC NUMBER REAL ID TRUE FALSE
%token INT LONG CHAR BOOL FLOAT DOUBLE
%token REL
%token IF ELSE WHILE DO BREAK FOR SWITCH CASE DEFAULT
%token OR AND
%left OR
%left AND
%right '!'
%left '+' '-'
%left '*' '/'
%right UMINUS
%right INC DEC
%%
program : block { }
;
block : '{' decls statementlist '}' { }
;
decls : decls decl { }
| { }
;
decl : type ID ';' { }
;
type : type '[' NUMBER ']' { }
| BASIC { }
;
statementlist :
statementlist M statement { backpatch(list, $1.nextlist, $2.instr);
$$.nextlist = $3.nextlist; }
| statement { $$.nextlist = $1.nextlist; }
;
statement :
IF '(' boolean ')' M statement ELSE N M statement { backpatch(list, $3.truelist, $5.instr);
backpatch(list, $3.falselist, $9.instr);
$6.nextlist = merge($6.nextlist, $8.nextlist);
$$.nextlist = merge($6.nextlist, $10.nextlist); }
| IF '(' boolean ')' M statement { backpatch(list, $3.truelist, $5.instr);
$$.nextlist = merge($3.falselist, $6.nextlist); }
| WHILE M '(' boolean ')' M statement { backpatch(list, $7.nextlist, $2.instr);
backpatch(list, $4.truelist, $6.instr);
$$.nextlist = $4.falselist;
gen_goto(list, $2.instr); }
| DO M statement M WHILE '(' boolean ')' M ';' { backpatch(list, $3.nextlist, $4.instr);
backpatch(list, $7.truelist, $9.instr);
$$.nextlist = $7.falselist;
gen_goto(list, $2.instr); }
| FOR '(' assignment ';' M boolean ';' M assignment ')' N M statement { backpatch(list, $6.truelist, $12.instr);
backpatch(list, $11.nextlist, $5.instr);
backpatch(list, $13.nextlist, $8.instr);
$$.nextlist = $6.falselist;
gen_goto(list, $8.instr); }
| BREAK ';' { }
| '{' statementlist '}' { $$.nextlist = $2.nextlist; }
| assignment ';' { $$.nextlist = NULL; }
;
assignment : ID '=' boolean { copyaddr(&$1, $1.lexeme); gen_assignment(list, $1, $3); }
;
loc : loc '[' boolean ']' { }
| ID { copyaddr(&$$, $1.lexeme); }
;
boolean : boolean OR M boolean { backpatch(list, $1.falselist, $3.instr);
$$.truelist = merge($1.truelist, $4.truelist);
$$.falselist = $4.falselist; }
| boolean AND M boolean { backpatch(list, $1.truelist, $3.instr);
$$.truelist = $4.truelist;
$$.falselist = merge($1.falselist, $4.falselist); }
| '!' boolean { $$.truelist = $1.falselist;
$$.falselist = $1.truelist; }
| '(' boolean ')' { $$.truelist = $1.truelist;
$$.falselist = $1.falselist; }
| expression REL expression { $$.truelist = new_instrlist(nextinstr(list));
$$.falselist = new_instrlist(nextinstr(list)+1);
gen_if(list, $1, $2.oper, $3);
gen_goto_blank(list); }
| TRUE { copyaddr(&$$, "TRUE");
gen_goto_blank(list); }
| FALSE { copyaddr(&$$, "FALSE");
gen_goto_blank(list); }
| expression { copyaddr_fromnode(&$$, $1); }
;
M : { $$.instr = nextinstr(list); }
;
N : { $$.nextlist = new_instrlist(nextinstr(list));
gen_goto_blank(list); }
;
expression : expression '+' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "+", $3); }
| expression '-' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "-", $3); }
| expression '*' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "*", $3); }
| expression '/' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "/", $3); }
| '-' expression %prec UMINUS { new_temp(&$$, get_temp_index(list)); gen_2addr(list, $$, "-", $2); }
| loc { copyaddr_fromnode(&$$, $1); }
| NUMBER { copyaddr(&$$, $1.lexeme); }
| REAL { copyaddr(&$$, $1.lexeme); }
;
%%
int yyerror(char* msg)
{
printf("\nERROR with message: %s\n", msg);
return 0;
}
int main()
{
list = newcodelist();
//申请一个内存空间给list
// int linecnt, capacity;
// int temp_index;
// char **code;
freopen("test.in", "rt+", stdin);
freopen("test.out", "wt+", stdout);
//打开文件
yyparse();
print(list);
fclose(stdin);
//关闭文件
fclose(stdout);
//关闭文件
return 0;
}
xu.h
#ifndef CP_H
#define CP_H
#include <stdio.h>
#include <string.h>
#include <malloc.h>
typedef struct listele
{
int instrno;
struct listele *next;
}listele;
listele* new_listele(int no)
{
listele* p = (listele*)malloc(sizeof(listele));
p->instrno = no;
p->next = NULL;
return p;
}
typedef struct instrlist
{
listele *first,*last;
}instrlist;
instrlist* new_instrlist(int instrno)
{
instrlist* p = (instrlist*)malloc(sizeof(instrlist));
p->first = p->last = new_listele(instrno);
return p;
}
instrlist* merge(instrlist *list1, instrlist *list2)
{
instrlist *p;
if (list1 == NULL) p = list2;
else
{
if (list2!=NULL)
{
if (list1->last == NULL)
{
list1->first = list2->first;
list1->last =list2->last;
}else list1->last->next = list2->first;
list2->first = list2->last = NULL;
free(list2);
}
p = list1;
}
return p;
}
typedef struct node
{
instrlist *truelist, *falselist, *nextlist;
char addr[256];
char lexeme[256];
char oper[3];
int instr;
}node;
int filloperator(node *dst, char *src)
{
strcpy(dst->oper, src);
return 0;
}
int filllexeme(node *dst, char *yytext)
{
strcpy(dst->lexeme, yytext);
return 0;
}
int copyaddr(node *dst, char *src)
{
strcpy(dst->addr, src);
return 0;
}
int new_temp(node *dst, int index)
{
sprintf(dst->addr, "t%d", index);
return 0;
}
int copyaddr_fromnode(node *dst, node src)
{
strcpy(dst->addr, src.addr);
return 0;
}
typedef struct codelist
{
int linecnt, capacity;
int temp_index;
char **code;
}codelist;
codelist* newcodelist()
{
codelist* p = (codelist*)malloc(sizeof(codelist));
p->linecnt = 0;
p->capacity = 1024;
p->temp_index = 0;
p->code = (char**)malloc(sizeof(char*)*1024);
return p;
}
int get_temp_index(codelist* dst)
{
return dst->temp_index++;
}
int nextinstr(codelist *dst) { return dst->linecnt; }
int Gen(codelist *dst, char *str)
{
if (dst->linecnt >= dst->capacity)
{
dst->capacity += 1024;
dst->code = (char**)realloc(dst->code, sizeof(char*)*dst->capacity);
if (dst->code == NULL)
{
printf("short of memeory\n");
return 0;
}
}
dst->code[dst->linecnt] = (char*)malloc(strlen(str)+20);
strcpy(dst->code[dst->linecnt], str);
dst->linecnt++;
return 0;
}
char tmp[1024];
int gen_goto_blank(codelist *dst)
{
sprintf(tmp, "goto");
Gen(dst, tmp);
return 0;
}
int gen_goto(codelist *dst, int instrno)
{
sprintf(tmp, "goto %d", instrno);
Gen(dst, tmp);
return 0;
}
int gen_if(codelist *dst, node left, char* op, node right)
{
sprintf(tmp, "if %s %s %s goto", left.addr, op, right.addr);
Gen(dst, tmp);
return 0;
}
int gen_1addr(codelist *dst, node left, char* op)
{
sprintf(tmp, "%s %s", left.addr, op);
Gen(dst, tmp);
return 0;
}
int gen_2addr(codelist *dst, node left, char* op, node right)
{
sprintf(tmp, "%s = %s %s", left.addr, op, right.addr);
Gen(dst, tmp);
return 0;
}
int gen_3addr(codelist *dst, node left, node op1, char* op, node op2)
{
sprintf(tmp, "%s = %s %s %s", left.addr, op1.addr, op, op2.addr);
Gen(dst, tmp);
return 0;
}
int gen_assignment(codelist *dst, node left, node right)
{
gen_2addr(dst, left, "", right);
return 0;
}
int backpatch(codelist *dst, instrlist *list, int instrno)
{
if (list!=NULL)
{
listele *p=list->first;
char tmp[20];
sprintf(tmp, " %d", instrno);
while (p!=NULL)
{
if (p->instrno<dst->linecnt)
strcat(dst->code[p->instrno], tmp);
p=p->next;
}
}
return 0;
}
int print(codelist* dst)
{
int i;
for (i=0; i < dst->linecnt; i++)
printf("%5d: %s\n", i, dst->code[i]);
return 0;
}
#endif
test.in
{
a=10;
x = b - c;
y = a * x;
z = x * d;
abcd = a + x +y;
if(a>10)
a=a+1;
a=a+2;
while(a>0)
a=a-1;
b=b+1;
}