首页 > 代码库 > 一个简单编译器前端的实现
一个简单编译器前端的实现
小记:
其实这个程序是编译原理这门课的综合实验,前段时间我申请免试又失败了,原因是有缺课,平时分不够,早上赖床现在尝到苦果我也是醉了……没办法,逼上梁山,只好攻克这个大boss以拿下免试资格。
选了一个最简单的文法,分析了1个多星期,终于决定开始要写的时候时间已经很紧了。
去实验室通宵了一晚,在宿舍熬了一晚,睡了3个小时就起来去验收了。还好是通过了,没白费劲。
不得不说,编译原理就是烧脑,知识点都比较抽象,如果数据结构和算法的基础打得不牢的话,实现起来会感到吃力。
再次感觉到了基础的重要性,这也是一个收获吧。
总的来说,相比较之前实现形式化的des加密算法,抽象的编译器更有难度,写完也更能让你感觉到提高。
ps:程序仅供参考,时间有限,没做太多测试。
功能:
给定一个简单语言的文法描述,本程序是该语言的编译器前端。
输入一个符合该文法规则的源文件,输出三地址形式的中间代码。
具体功能有词法分析,语法分析,分析流程显示,错误提示等。
程序运行结果:
源程序:
分析过程:
生成中间代码(三地址形式):
分析过程:
文法:
文法(消除左递归后)Program->BEGIN Stmt-List ENDStmt-List->Stmt Stmt-List‘Stmt-List‘->Stmt Stmt-List‘ | .Stmt->Assign-stmtAssign-stmt->ID = ExprExpr->Term Expr‘Expr‘->Add-Op Term Expr‘ | .Term->Factor Term‘Term‘->Multiple-Op Factor Term‘ | .Factor->( Expr ) | ID | NUMAdd-Op->+ | -Multiple-Op->* | /
FIRST集First(Program)={BEGIN} First(Stmt-List)={ID}First(Stmt-List‘)={ ID,ε}First(Stmt)={ ID}First(Assign-stmt)={ ID}First(Expr)={(,ID,NUM}First(Expr‘)={+,-,ε}First(Term)={(,ID,NUM}First(Term‘)={*,/,ε}First(Factor)={(,ID,NUM}First(Add-Op)={+,-}First(Multiple-Op)={*,/}
FOLLOW集Follow(Program)={$} Follow(Stmt-List)={END}Follow(Stmt-List‘)={ END}Follow(Stmt)={ ID,END}Follow(Assign-stmt)={ ID,END }Follow(Expr)={ ID,END ,)}Follow(Expr‘)={ ID,END , )}Follow(Term)={+,-, ID,END , )}Follow(Term‘)={ +,-, ID,END , )}Follow(Factor)={ *,/,+,-, ID,END , )}Follow(Add-Op)={(,ID,NUM}Follow(Multiple-Op)={ (,ID,NUM }
预测分析表:
代码:
1 //词法分析中‘ ‘代表空白字符,包括换行符,空格,制表符 2 //源程序格式:1、一行只能有一条语句;2、程序中可以有注释 3 #include <iostream> 4 #include <string.h> 5 #include <stdio.h> 6 #include <stack> 7 #include <stdlib.h> 8 using namespace std; 9 10 char src[1000010]; //存储源代码 11 char TokenList_kind[1010][100]; //词法单元流 之 类别 12 int TokenList_value[1010]; //词法单元流 之 值 13 char WordList[1010][100]; //符号表 -- 由token_value指向 14 int LineList[1010]; //记录每一个token对应的行数 -- 由token_value指向 15 int TokenListPoint; //词法单元流指针 16 int WordListPoint; //符号表指针 17 18 int WordsPoint; //语法分析时的词法单元流指针 19 20 int tmpcnt; //三地址语句中寄存器的编号 21 22 int ExpTable[11][8] = { //用 表驱动法 读取表达式 ,状态转换表 23 2, -1, 1, -1, -1, -1, -1, 9, 24 2, -1, -1, -1, -1, -1, -1, 9, 25 2, -1, -1, 3, -1, -1, -1, 9, 26 4, 5, -1, -1, -1, -1, -1, 9, 27 4, -1, 8, -1, 6, -1, 10, 9, 28 -1, 5, 8, -1, 6, -1, 10, 9, 29 4, 5, -1, -1, -1, 7, -1, 9, 30 4, 5, -1, -1, -1, -1, -1, 9, 31 -1, -1, -1, -1, -1, -1, -1, -1, 32 -1, -1, -1, -1, -1, -1, -1, -1, 33 -1, -1, 8, -1, 6, -1, 10, 9 34 }; 35 36 /* 37 文法 38 1 Program->BEGIN Stmt-List END 39 2 Stmt-List->Stmt Stmt-List‘ 40 3 Stmt-List‘->Stmt Stmt-List‘ | . 41 4 Stmt->Assign-stmt 42 5 Assign-stmt->ID = Expr 43 6 Expr->Term Expr‘ 44 7 Expr‘->Add-Op Term Expr‘ | . 45 8 Term->Factor Term‘ 46 9 Term‘->Multiple-Op Factor Term‘ | . 47 10 Factor->( Expr ) | ID | NUM 48 11 Add-Op->+ | - 49 12 Multiple-Op->* | / 50 51 预测分析表 52 非终结符号 输入符号 53 BEGIN ‘+’ ‘-’ ‘*’ ‘/’ ( ) ‘=’ ID NUM END $ 54 Program 1 55 Stmt-List 2 56 Stmt-List‘ 3 . 57 Stmt 4 58 Assign-stmt 5 59 Expr 6 6 6 60 Expr‘ 7 7 . . . 61 Term 8 8 8 62 Term‘ . . 9 9 . . . 63 Factor 10_1 10_210_3 64 Add-Op 11_1 11_2 65 Multiple-Op 12_1 12_2 66 */ 67 68 char PreTab[12][12][100]={ //存储预测分析表 69 {"BEGIN Stmt-List END","","","","","","","","","","",""}, 70 {"","","","","","","","","Stmt Stmt-List‘","","",""}, 71 {"","","","","","","","","Stmt Stmt-List‘","",".",""}, 72 {"","","","","","","","","Assign-stmt","","",""}, 73 {"","","","","","","","","ID = Expr","","",""}, 74 {"","","","","","Term Expr‘","","","Term Expr‘","Term Expr‘","",""}, 75 {"","Add-Op Term Expr‘","Add-Op Term Expr‘","","","",".","",".","",".",""}, 76 {"","","","","","Factor Term‘","","","Factor Term‘","Factor Term‘","",""}, 77 {"",".",".","Multiple-Op Factor Term‘","Multiple-Op Factor Term‘","",".","",".","",".",""}, 78 {"","","","","","( Expr )","","","ID","NUM","",""}, 79 {"","+","-","","","","","","","","",""}, 80 {"","","","*","/","","","","","","",""} 81 }; 82 83 char Product[23][50]={ //记录产生式出现的字符串 84 "Program", //0 85 "Stmt-List", //1 86 "Stmt-List‘", //2 87 "Stmt", //3 88 "Assign-stmt", //4 89 "Expr", //5 90 "Expr‘", //6 91 "Term", //7 92 "Term‘", //8 93 "Factor", //9 94 "Add-Op", //10 95 "Multiple-Op", //11 96 97 "BEGIN", //12 98 "+", //13 99 "-", //14 100 "*", //15 101 "/", //16 102 "(", //17 103 ")", //18 104 "=", //19 105 "ID", //20 106 "NUM", //21 107 "END" //22 108 }; 109 110 111 //语法树节点 112 struct Node{ 113 Node(char na[]){ //构造函数 114 strcpy(name,na); 115 wp = 0; 116 brother = NULL; 117 next = NULL; 118 } 119 char name[50]; 120 int wp; //终结符在token流中的位置 121 Node* brother; //指向下一个兄弟节点 122 Node* next; //指向当前节点的孩子节点 123 }*root,*now,*p; 124 125 void Init() 126 { 127 memset(src,0,sizeof(src)); //初始化源代码字符数组 128 memset(TokenList_kind,0,sizeof(TokenList_kind)); //初始化词法单元流 129 memset(TokenList_value,0,sizeof(TokenList_value)); //初始化词法单元流 130 memset(WordList,0,sizeof(WordList)); //初始化符号表 131 TokenListPoint = 1; 132 WordListPoint = 1; 133 134 WordsPoint = 1; 135 136 //初始化指针 137 root = NULL; 138 now = NULL; 139 p = NULL; 140 tmpcnt = 1; 141 } 142 143 //--------------- 词法分析 start --------------- 144 void AddToken(char kind[],char value[],int line) //将<类别,值>放入token流中 145 { 146 strcpy(TokenList_kind[TokenListPoint],kind); 147 TokenList_value[TokenListPoint++] = WordListPoint; 148 149 strcpy(WordList[WordListPoint],value); 150 LineList[WordListPoint++] = line; 151 } 152 153 void strsub(char str1[],int start,int len,char str2[]) //截取str1的子字符串给str2 154 { 155 int i=0,j; 156 for(j=0;j<len;j++){ 157 str2[i++] = str1[start+j]; 158 } 159 str2[i] = ‘\0‘; 160 } 161 162 void InputString() //从文件中读取源代码,同时过滤注释,一行只能放置一条语句 163 { 164 //文件读入 165 FILE* fs = fopen(".\\src.txt","rb"); //源代码 166 if(fs==NULL){ 167 printf("源代码文件 src.txt 不存在\n"); 168 return ; 169 } 170 171 char s[10010]={0}; 172 //fscanf(fs,"%s",src); 173 while(fgets(s,10010,fs)){ 174 if(sscanf(s,"%s",s)==-1){ //没有读到字符串,将s清空 175 s[0]=‘\0‘; 176 continue; 177 } 178 //过滤注释 179 if(s[0]==‘/‘ && s[1]==‘/‘) 180 continue; 181 int i; 182 for(i=0;s[i];i++) 183 if(s[i]==‘/‘ && s[i+1]==‘/‘) 184 break; 185 s[i]=‘\0‘; 186 187 strcat(src,s); //连接到源代码字符串后 188 strcat(src," "); //连接一个空白符 189 } 190 191 int len = strlen(src); 192 len--; 193 src[len] = ‘\0‘; 194 } 195 196 bool getBEGIN(int &i,int &line) //读取“BEGIN” 197 { 198 char tmp[6]; 199 strsub(src,i,5,tmp); 200 if(strcmp(tmp,"BEGIN")==0){ 201 i = i+5; 202 AddToken("BEGIN","BEGIN",line); //添加token,<类别,值> 203 return true; 204 } 205 else 206 return false; 207 } 208 bool getBLANK(int &i) //读取“空白符”==>‘ ‘ 209 { 210 if(src[i]==‘ ‘){ 211 i++; 212 return true; 213 } 214 else 215 return false; 216 } 217 bool getEND(int &i,int &line) //读取“END” 218 { 219 char tmp[4]; 220 strsub(src,i,3,tmp); 221 if(strcmp(tmp,"END")==0){ 222 i = i+3; 223 AddToken("END","END",line); //添加token,<类别,值> 224 return true; 225 } 226 else 227 return false; 228 } 229 bool getExp(int &i,int &line) //读取表达式,遇到‘ ‘结束 230 { 231 int status=0; 232 char tmp[10010]={0}; 233 char t[2]={0}; 234 int j=0; 235 stack <char> ss; 236 while(status!=8){ 237 if(status==-1) //跳到错误的状态号,返回false 238 return false; 239 if(status==9) //跳到了错误状态,返回false 240 return false; 241 242 //根据src[i]确定下一个状态,直到抵达结束状态 8 243 if( (‘a‘<=src[i] && src[i]<=‘z‘) || (‘A‘<=src[i] && src[i]<=‘Z‘) ){ 244 //读取到字母 245 status = ExpTable[status][0]; 246 tmp[j++] = src[i++]; 247 } 248 else if(‘0‘<=src[i] && src[i]<=‘9‘){ 249 //读取到数字 250 status = ExpTable[status][1]; 251 tmp[j++] = src[i++]; 252 } 253 else{ 254 switch(src[i]){ 255 case ‘ ‘: //空白符 256 status = ExpTable[status][2]; 257 if(tmp[0]!=‘\0‘){ 258 //如果是后来读到空白符,说明表达式该结束了 259 //将读取的最后一个单词放入token 260 if(j!=0){ //说明之前是一个单词或数字,否则是一个),)已经将最后一个单词放入token流中了,所以不需要处理它 261 if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) ) 262 AddToken("ID",tmp,line); //将这个字符代表的单词放入token流中 263 else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ ) 264 AddToken("NUM",tmp,line); //将这个字符代表的单词放入token流中 265 j=0; 266 } 267 } 268 line++; 269 i++; 270 break; 271 case ‘=‘: // = 272 status = ExpTable[status][3]; 273 274 tmp[j] = ‘\0‘; 275 AddToken("ID",tmp,line); //=前面一定是标识符,放入token流 276 j=0; 277 AddToken("=","=",line); //将当前的=也放入token流 278 279 i++; 280 break; 281 case ‘+‘: //OP 282 case ‘-‘: 283 case ‘*‘: 284 case ‘/‘: 285 status = ExpTable[status][4]; 286 287 tmp[j] = ‘\0‘; 288 if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) ) //如果运算符前是字母,则说明是标识符 289 AddToken("ID",tmp,line); 290 else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ ) 291 AddToken("NUM",tmp,line); 292 j=0; 293 t[0] = src[i]; 294 t[1] = ‘\0‘; 295 AddToken(t,t,line); //将运算符放入token流 296 297 i++; 298 break; 299 case ‘(‘: // ( 300 status = ExpTable[status][5]; 301 302 AddToken("(","(",line); //将(放入token流 303 ss.push(‘(‘); 304 305 i++; 306 break; 307 case ‘)‘: // ) 308 status = ExpTable[status][6]; 309 310 tmp[j] = ‘\0‘; 311 if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) ) //如果)前是字母,则说明是标识符,即ID 312 AddToken("ID",tmp,line); 313 else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ ) 314 AddToken("NUM",tmp,line); 315 j=0; 316 317 AddToken(")",")",line); //将(放入token流 318 319 if(ss.empty()) //括号不匹配 320 return false; 321 else 322 ss.pop(); 323 324 i++; 325 break; 326 default: //其它 327 status = ExpTable[status][7]; 328 i++; 329 break; 330 } 331 } 332 } 333 334 if(status==8){ //正确跳出 335 /* 336 if(ss.empty()) //括号匹配 337 return true; 338 else 339 return false; 340 */ 341 return true; 342 } 343 else 344 return false; 345 } 346 347 bool Lex() //进行词法分析 348 { 349 printf("词法分析......\n"); 350 int i=0; 351 int line = 1; 352 InputString(); 353 try{ 354 getBEGIN(i,line)?1:throw 1; 355 getBLANK(i)?line++:throw 2; 356 while(1){ //读取表达式 357 if(src[i]==‘\0‘) //还没检测到END,程序就结束了 358 throw 3; 359 char tmp[4]; 360 strsub(src,i,3,tmp); //截取字符串 361 if(strcmp(tmp,"END")==0){ 362 //如果检测到END,跳出循环 363 break; 364 } 365 366 //读取表达式 367 getExp(i,line)?1:throw 4; 368 } 369 getEND(i,line)?1:throw 3; 370 if(src[i]!=‘\0‘) //如果END结束标志之后还有输入符号,说明出错误了 371 throw 5; 372 } 373 catch(int err){ 374 printf("【词法错误】\n"); 375 //计算行号 376 int errline = 1; 377 int j; 378 for(j=0;j<=i;j++) 379 if(src[j]==‘ ‘) 380 errline++; 381 382 switch(err){ 383 case 1: 384 printf("ERROR 1 (第%d行) : 没有读取到开始标识 BEGIN!\n",errline); 385 printf("具体位置:\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]); 386 printf("\t\t"); 387 for(j=0;j<strlen(WordList[WordListPoint-2]);j++) 388 printf(" "); 389 printf("^\n"); 390 printf("\n"); 391 return false; 392 case 2: 393 printf("ERROR 2 (第%d行) : 没有读取到空白符!\n",errline); 394 printf("具体位置:\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]); 395 printf("\t\t"); 396 for(j=0;j<strlen(WordList[WordListPoint-2]);j++) 397 printf(" "); 398 printf("^\n"); 399 printf("\n"); 400 return false; 401 case 3: 402 printf("ERROR 3 (第%d行) : 没有读取到结束标识 END!\n",errline); 403 printf("具体位置:\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]); 404 printf("\t\t"); 405 for(j=0;j<strlen(WordList[WordListPoint-2]);j++) 406 printf(" "); 407 printf("^\n"); 408 printf("\n"); 409 printf("\n"); 410 return false; 411 case 4: 412 printf("ERROR 4 (第%d行) : 表达式错误。例如:标识符有误!\n",errline); 413 printf("具体位置:\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]); 414 printf("\t\t"); 415 for(j=0;j<strlen(WordList[WordListPoint-2]);j++) 416 printf(" "); 417 printf("^\n"); 418 printf("\n"); 419 return false; 420 case 5: 421 printf("ERROR 5 (第%d行) : BEGIN...END 代码段格式错误!\n",errline); 422 printf("具体位置:\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]); 423 printf("\t\t"); 424 for(j=0;j<strlen(WordList[WordListPoint-2]);j++) 425 printf(" "); 426 printf("^\n"); 427 printf("\n"); 428 return false; 429 default:break; 430 } 431 } 432 printf("没有词法错误\n"); 433 printf("\n"); 434 return true; 435 } 436 437 void printTokenList() //输出token流 438 { 439 int i=1; 440 printf("单词流:\n"); 441 printf("<\t类别\t,值\t>\n"); 442 for(;i<TokenListPoint;i++){ 443 printf("<\t%s\t,%s\t>\n",TokenList_kind[i],WordList[TokenList_value[i]]); 444 } 445 printf("\n"); 446 } 447 //--------------- 词法分析 end --------------- 448 449 450 //--------------- 语法分析 start --------------- 451 //生成语法分析树 452 453 void OutStack(stack <char*> z) //输出栈中内容的前两个 454 { 455 char t[200]={0}; 456 int j=0; 457 while(!z.empty()){ 458 if(j==2) 459 break; 460 if(j==1) 461 strcat(t,","); 462 strcat(t,z.top()); 463 z.pop(); 464 j++; 465 } 466 strcat(t,"$"); 467 printf("%23s",t); 468 } 469 470 void OutRightStr() //输出当前token流中前两个 471 { 472 char t[200]={0}; 473 int i,j=0; 474 for(i = WordsPoint;i<WordListPoint;i++){ 475 if(j==2) 476 break; 477 char tt[200]={0}; 478 sprintf(tt,"<%s,%s>",TokenList_kind[i],WordList[TokenList_value[i]]); 479 strcat(t,tt); 480 j++; 481 } 482 printf("%23s$",t); 483 } 484 485 void OutAction(char action[]) //输出动作 486 { 487 printf(" %s\n",action); 488 } 489 490 bool seltab(int row,char f[]) 491 { 492 if(strcmp(TokenList_kind[WordsPoint],"BEGIN")==0){ 493 strcpy(f,PreTab[row][0]); 494 } 495 else if(strcmp(TokenList_kind[WordsPoint],"+")==0){ 496 strcpy(f,PreTab[row][1]); 497 } 498 else if(strcmp(TokenList_kind[WordsPoint],"-")==0){ 499 strcpy(f,PreTab[row][2]); 500 } 501 else if(strcmp(TokenList_kind[WordsPoint],"*")==0){ 502 strcpy(f,PreTab[row][3]); 503 } 504 else if(strcmp(TokenList_kind[WordsPoint],"/")==0){ 505 strcpy(f,PreTab[row][4]); 506 } 507 else if(strcmp(TokenList_kind[WordsPoint],"(")==0){ 508 strcpy(f,PreTab[row][5]); 509 } 510 else if(strcmp(TokenList_kind[WordsPoint],")")==0){ 511 strcpy(f,PreTab[row][6]); 512 } 513 else if(strcmp(TokenList_kind[WordsPoint],"=")==0){ 514 strcpy(f,PreTab[row][7]); 515 } 516 else if(strcmp(TokenList_kind[WordsPoint],"ID")==0){ 517 strcpy(f,PreTab[row][8]); 518 } 519 else if(strcmp(TokenList_kind[WordsPoint],"NUM")==0){ 520 strcpy(f,PreTab[row][9]); 521 } 522 else if(strcmp(TokenList_kind[WordsPoint],"END")==0){ 523 strcpy(f,PreTab[row][10]); 524 } 525 else if(strcmp(TokenList_kind[WordsPoint],"$")==0){ 526 strcpy(f,PreTab[row][11]); 527 } 528 else{ 529 return false; 530 } 531 if(f[0]==‘\0‘) //确定的表位置为空 532 return false; 533 return true; 534 } 535 536 bool Parse() //语法分析阶段。输入是token流,输出分析结果 537 { 538 stack <char*> z; 539 stack <Node*> nz; 540 z.push(*Product); 541 542 root = new Node("Program"); 543 now = root; 544 545 printf("词法分析......\n"); 546 printf("【分析流程】\n"); 547 printf("%22s%24s%22s\n","栈","输入","动作"); 548 char action[100]={0}; 549 try{ 550 while(!z.empty() || WordsPoint<WordListPoint){ //栈和输入串中只剩结束符号(栈空 or 指向‘\0‘)时退出循环 551 552 //输出当前分析结果 553 OutStack(z); //输出栈中内容 554 OutRightStr(); //输出剩余输入字符串 555 OutAction(action); //输出动作 556 557 memset(action,0,sizeof(action)); 558 559 //预处理。例:匹配e。预处理:将e出栈,输入指针后移。 560 561 //还没结束栈就空了 562 if(z.empty()) 563 throw 1; //非法跳出 564 565 if(strcmp(z.top(),TokenList_kind[WordsPoint])==0){ //栈顶元素==指针指向字符,匹配成功 566 //语义动作 - 构造语法树 567 while(!now->brother){ //回到有下一个兄弟节点的节点为止 568 if(nz.empty()){ 569 if(now==root) //如果回到了根节点 570 goto label; 571 else 572 throw 1; 573 } 574 now = nz.top(); 575 nz.pop(); 576 } 577 now = now->brother; 578 579 label: 580 //准备输出字符串 action 581 strcat(action,"匹配"); 582 strcat(action,z.top()); 583 584 //出栈、指针后移 585 if(!z.empty()) 586 z.pop(); 587 if(WordsPoint<WordListPoint) 588 WordsPoint++; 589 } 590 else{ //不相等,输出推导 591 //确定推导 592 char f[200]={0}; 593 if(strcmp(z.top(),"Program")==0){ 594 if(!seltab(0,f)) //从表中确定推导串 595 throw 2; //没有找到对应的推导 596 } 597 else if(strcmp(z.top(),"Stmt-List")==0){ 598 if(!seltab(1,f)) //从表中确定推导串 599 throw 2; //没有找到对应的推导 600 } 601 else if(strcmp(z.top(),"Stmt-List‘")==0){ 602 if(!seltab(2,f)) //从表中确定推导串 603 throw 2; //没有找到对应的推导 604 } 605 else if(strcmp(z.top(),"Stmt")==0){ 606 if(!seltab(3,f)) //从表中确定推导串 607 throw 2; //没有找到对应的推导 608 } 609 else if(strcmp(z.top(),"Assign-stmt")==0){ 610 if(!seltab(4,f)) //从表中确定推导串 611 throw 2; //没有找到对应的推导 612 } 613 else if(strcmp(z.top(),"Expr")==0){ 614 if(!seltab(5,f)) //从表中确定推导串 615 throw 2; 616 } 617 else if(strcmp(z.top(),"Expr‘")==0){ 618 if(!seltab(6,f)) //从表中确定推导串 619 throw 2; 620 } 621 else if(strcmp(z.top(),"Term")==0){ 622 if(!seltab(7,f)) //从表中确定推导串 623 throw 2; 624 } 625 else if(strcmp(z.top(),"Term‘")==0){ 626 if(!seltab(8,f)) //从表中确定推导串 627 throw 2; 628 } 629 else if(strcmp(z.top(),"Factor")==0){ 630 if(!seltab(9,f)) //从表中确定推导串 631 throw 2; 632 } 633 else if(strcmp(z.top(),"Add-Op")==0){ 634 if(!seltab(10,f)) //从表中确定推导串 635 throw 2; 636 } 637 else if(strcmp(z.top(),"Multiple-Op")==0){ 638 if(!seltab(11,f)) //从表中确定推导串 639 throw 2; 640 } 641 else{ 642 throw 2; 643 } 644 //准备输出字符串 action 645 strcat(action,"输出"); 646 strcat(action,z.top()); 647 strcat(action,"->"); 648 strcat(action,f); 649 650 //将栈顶字符串记录下来后用 651 char proleft[50]; 652 //将栈顶元素出栈并将推导入栈 653 if(!z.empty()){ 654 strcpy(proleft,z.top()); 655 z.pop(); 656 } 657 else 658 throw 1; 659 660 char tmp[100]; 661 662 //如果推导出来的是空,即f->.,不作处理 663 if(f[0]==‘.‘){ 664 nz.push(now); 665 666 now->next = new Node("."); 667 now = now->next; //now指向当前产生式右边的第一个字符串 668 669 while(!now->brother){ //回到有下一个兄弟节点的节点为止 670 if(nz.empty()){ 671 if(now==root) //如果回到了根节点 672 goto label; 673 else 674 throw 1; 675 } 676 now = nz.top(); 677 nz.pop(); 678 } 679 now = now->brother; 680 681 continue; 682 } 683 stack <char*> tz; //临时的栈 684 685 //正向输入到临时栈中 686 while(sscanf(f,"%s",tmp)!=-1){ 687 //语义动作 - 创建语法树 688 if(strcmp(proleft,now->name)==0){ 689 nz.push(now); //将当前节点记录下来 690 691 now->next = new Node(tmp); 692 now = now->next; //now指向当前产生式右边的第一个字符串 693 p = now; 694 } 695 else{ 696 p->brother = new Node(tmp); 697 p = p->brother; 698 } 699 700 char* pos = strstr(f,tmp); 701 strcpy(f,pos+strlen(tmp)); 702 if(strcmp(tmp,"Program")==0){ 703 tz.push(*(Product)); 704 } 705 else if(strcmp(tmp,"Stmt-List")==0){ 706 tz.push(*(Product+1)); 707 } 708 else if(strcmp(tmp,"Stmt-List‘")==0){ 709 tz.push(*(Product+2)); 710 } 711 else if(strcmp(tmp,"Stmt")==0){ 712 tz.push(*(Product+3)); 713 } 714 else if(strcmp(tmp,"Assign-stmt")==0){ 715 tz.push(*(Product+4)); 716 } 717 else if(strcmp(tmp,"Expr")==0){ 718 tz.push(*(Product+5)); 719 } 720 else if(strcmp(tmp,"Expr‘")==0){ 721 tz.push(*(Product+6)); 722 } 723 else if(strcmp(tmp,"Term")==0){ 724 tz.push(*(Product+7)); 725 } 726 else if(strcmp(tmp,"Term‘")==0){ 727 tz.push(*(Product+8)); 728 } 729 else if(strcmp(tmp,"Factor")==0){ 730 tz.push(*(Product+9)); 731 } 732 else if(strcmp(tmp,"Add-Op")==0){ 733 tz.push(*(Product+10)); 734 } 735 else if(strcmp(tmp,"Multiple-Op")==0){ 736 tz.push(*(Product+11)); 737 } 738 else if(strcmp(tmp,"BEGIN")==0){ 739 tz.push(*(Product+12)); 740 } 741 else if(strcmp(tmp,"+")==0){ 742 tz.push(*(Product+13)); 743 } 744 else if(strcmp(tmp,"-")==0){ 745 tz.push(*(Product+14)); 746 } 747 else if(strcmp(tmp,"*")==0){ 748 tz.push(*(Product+15)); 749 } 750 else if(strcmp(tmp,"/")==0){ 751 tz.push(*(Product+16)); 752 } 753 else if(strcmp(tmp,"(")==0){ 754 tz.push(*(Product+17)); 755 } 756 else if(strcmp(tmp,")")==0){ 757 tz.push(*(Product+18)); 758 } 759 else if(strcmp(tmp,"=")==0){ 760 tz.push(*(Product+19)); 761 } 762 else if(strcmp(tmp,"ID")==0){ 763 tz.push(*(Product+20)); 764 } 765 else if(strcmp(tmp,"NUM")==0){ 766 tz.push(*(Product+21)); 767 } 768 else if(strcmp(tmp,"END")==0){ 769 tz.push(*(Product+22)); 770 } 771 else{ 772 throw 1; 773 } 774 775 } 776 777 //反向输出到真正的栈中 778 while(!tz.empty()){ 779 if(strcmp(tz.top(),"Program")==0){ 780 z.push(*(Product)); 781 } 782 else if(strcmp(tz.top(),"Stmt-List")==0){ 783 z.push(*(Product+1)); 784 } 785 else if(strcmp(tz.top(),"Stmt-List‘")==0){ 786 z.push(*(Product+2)); 787 } 788 else if(strcmp(tz.top(),"Stmt")==0){ 789 z.push(*(Product+3)); 790 } 791 else if(strcmp(tz.top(),"Assign-stmt")==0){ 792 z.push(*(Product+4)); 793 } 794 else if(strcmp(tz.top(),"Expr")==0){ 795 z.push(*(Product+5)); 796 } 797 else if(strcmp(tz.top(),"Expr‘")==0){ 798 z.push(*(Product+6)); 799 } 800 else if(strcmp(tz.top(),"Term")==0){ 801 z.push(*(Product+7)); 802 } 803 else if(strcmp(tz.top(),"Term‘")==0){ 804 z.push(*(Product+8)); 805 } 806 else if(strcmp(tz.top(),"Factor")==0){ 807 z.push(*(Product+9)); 808 } 809 else if(strcmp(tz.top(),"Add-Op")==0){ 810 z.push(*(Product+10)); 811 } 812 else if(strcmp(tz.top(),"Multiple-Op")==0){ 813 z.push(*(Product+11)); 814 } 815 else if(strcmp(tz.top(),"BEGIN")==0){ 816 z.push(*(Product+12)); 817 } 818 else if(strcmp(tz.top(),"+")==0){ 819 z.push(*(Product+13)); 820 } 821 else if(strcmp(tz.top(),"-")==0){ 822 z.push(*(Product+14)); 823 } 824 else if(strcmp(tz.top(),"*")==0){ 825 z.push(*(Product+15)); 826 } 827 else if(strcmp(tz.top(),"/")==0){ 828 z.push(*(Product+16)); 829 } 830 else if(strcmp(tz.top(),"(")==0){ 831 z.push(*(Product+17)); 832 } 833 else if(strcmp(tz.top(),")")==0){ 834 z.push(*(Product+18)); 835 } 836 else if(strcmp(tz.top(),"=")==0){ 837 z.push(*(Product+19)); 838 } 839 else if(strcmp(tz.top(),"ID")==0){ 840 z.push(*(Product+20)); 841 } 842 else if(strcmp(tz.top(),"NUM")==0){ 843 z.push(*(Product+21)); 844 } 845 else if(strcmp(tz.top(),"END")==0){ 846 z.push(*(Product+22)); 847 } 848 else{ 849 throw 1; 850 } 851 tz.pop(); 852 } 853 } 854 855 } 856 if(z.empty() && WordsPoint >= WordListPoint){ //正常退出循环 857 //输出最后一行分析结果 858 OutStack(z); //输出栈中内容 859 OutRightStr(); //输出剩余输入字符串 860 OutAction(action); //输出动作 861 } 862 else{ //非正常情况 863 throw 1; 864 } 865 } 866 catch(int err){ 867 printf("\n"); 868 printf("【语法错误】\n"); 869 870 switch(err){ 871 case 1: 872 printf("ERROR 1 (第%d行) : 非法跳出!\n",LineList[WordsPoint]); 873 printf("\n"); 874 return false; 875 case 2: 876 printf("ERROR 2 (第%d行) : 没有找到 %s 对应的推导!\n",LineList[WordsPoint],z.top()); 877 printf("\n"); 878 return false; 879 default: 880 break; 881 } 882 } 883 printf("没有语法错误\n"); 884 printf("\n"); 885 return true; 886 } 887 //--------------- 语法分析 end --------------- 888 889 //--------------- 生成三地址代码 start --------------- 890 //对语法分析树进行后序遍历,执行语义规则,一遍扫描之后,树根的code即为三地址代码 891 892 893 void setWP(Node* cur) //设置每一个叶子节点的wp指针 894 { 895 if(!cur->next){ 896 if(cur->name[0]==‘.‘) 897 return ; 898 cur->wp = WordsPoint++; 899 return ; 900 } 901 Node* p = cur->next; 902 while(p){ 903 setWP(p); 904 p = p->brother; 905 } 906 } 907 908 char* getNext(Node* cur,FILE* fo) 909 { 910 //递归出口 911 if(!cur->next){ 912 if(cur->name[0]==‘.‘ 913 || cur->name[0]==‘(‘ 914 || cur->name[0]==‘)‘){ //忽略空. 和 ( 和 ) 915 char* t = new char[2]; 916 t[0]=‘\0‘; 917 return t; 918 } 919 return WordList[TokenList_value[cur->wp]]; 920 } 921 if(strcmp(cur->name,"Program")==0){ 922 fprintf(fo,"%s\r\n",getNext(cur->next,fo)); 923 getNext(cur->next->brother,fo); 924 fprintf(fo,"%s\r\n",getNext(cur->next->brother->brother,fo)); 925 char* tmp = new char[2]; 926 tmp[0] = ‘\0‘; 927 return tmp; 928 } 929 else if(strcmp(cur->name,"Assign-stmt")==0){ 930 char* tmp = new char[2]; 931 tmp[0] = ‘\0‘; 932 fprintf(fo,"%s=%s\r\n",getNext(cur->next,fo),getNext(cur->next->brother->brother,fo)); 933 return tmp; 934 } 935 else if(strcmp(cur->name,"Term")==0){ 936 char* tmp = new char[150]; 937 tmp = getNext(cur->next->brother,fo); 938 if(tmp[0]==‘*‘ || tmp[0]==‘/‘){ 939 fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next,fo),tmp); 940 sprintf(tmp,"t%d",tmpcnt++); 941 return tmp; 942 } 943 } 944 else if(strcmp(cur->name,"Expr")==0){ 945 char* tmp = new char[150]; 946 tmp = getNext(cur->next->brother,fo); 947 if(tmp[0]==‘+‘ || tmp[0]==‘-‘){ 948 fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next,fo),tmp); 949 sprintf(tmp,"t%d",tmpcnt++); 950 return tmp; 951 } 952 } 953 else if(strcmp(cur->name,"Term‘")==0){ 954 char* tmp = new char[150]; 955 if(cur->next->brother!=NULL){ 956 p = cur->next->brother->brother; 957 if(p->next->brother!=NULL){ 958 tmp = getNext(cur->next->brother->brother,fo); 959 if(tmp[0]==‘*‘ || tmp[0]==‘/‘){ 960 if(*getNext(cur->next,fo)==‘/‘){ //如果最前面的是‘-’,后面的运算符应该是反的 961 if(tmp[0]==‘/‘) 962 tmp[0]=‘*‘; 963 else 964 tmp[0]=‘/‘; 965 } 966 fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next->brother,fo),tmp); 967 sprintf(tmp,"%st%d",getNext(cur->next,fo),tmpcnt++); 968 return tmp; 969 } 970 } 971 } 972 } 973 else if(strcmp(cur->name,"Expr‘")==0){ 974 char* tmp = new char[150]; 975 if(cur->next->brother!=NULL){ 976 p = cur->next->brother->brother; 977 if(p->next->brother!=NULL){ 978 tmp = getNext(cur->next->brother->brother,fo); 979 if(tmp[0]==‘+‘ || tmp[0]==‘-‘){ 980 if(*getNext(cur->next,fo)==‘-‘){ //如果最前面的是‘-’,后面的运算符应该是反的 981 if(tmp[0]==‘-‘) 982 tmp[0]=‘+‘; 983 else 984 tmp[0]=‘-‘; 985 } 986 fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next->brother,fo),tmp); 987 sprintf(tmp,"%st%d",getNext(cur->next,fo),tmpcnt++); 988 return tmp; 989 } 990 } 991 } 992 } 993 994 char * s = new char[150]; //new一个字符串,存储这个节点的结果 995 memset(s,0,sizeof(s)); 996 997 Node* p = cur->next; 998 while(p){ 999 strcat(s,getNext(p,fo));1000 p = p->brother;1001 }1002 return s;1003 }1004 1005 void get3AddrCode()1006 {1007 WordsPoint = 1;1008 FILE* fo = fopen(".\\out.txt","wb");1009 if(fo==NULL){1010 printf("三地址代码生成文件 out.txt 打开失败!\n");1011 return ;1012 }1013 1014 setWP(root);1015 printf("生成三地址语句......\n");1016 getNext(root,fo);1017 }1018 //--------------- 生成三地址代码 end ---------------1019 1020 int main()1021 {1022 bool f=true;1023 Init();1024 if(!Lex()) //词法分析1025 f=false;1026 //printTokenList();1027 if(!Parse()) //语法分析1028 f=false;1029 if(f) //都通过了才能生成三地址代码1030 get3AddrCode(); //生成三地址代码1031 return 0;1032 }
Freecode : www.cnblogs.com/yym2013
一个简单编译器前端的实现
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。