首页 > 代码库 > 一个简单的C语言语法检查器的实现
一个简单的C语言语法检查器的实现
我自己的实现方法的核心过程:首先用一个非终结符代表所有要检查的程序代码,然后根据文法将这个整体的符号不断展开,以拼凑成按检查的程序的顺序排列的终结符序列,能成功说明语法正确,否则有错误。
关键词:分词;First集;Select集;
目前还存在的问题:
1.因为同一个非终结符->终结符的转化可能有多种解释,所以目前我的非终结符展开这里是用递归写的,因此不能定位具体错在哪里。
2.int a={1,2};int a[2]={1,‘b‘};这种该出错的地方不会出错。这个比较致命,但目前还没想好怎么解决。
代码部分借鉴了这里,我直接用了他的分词的scnner函数和宏定义的部分。他的程序总共2000+行,我改了一下,总共只用1000行就实现了他的功能,其中500行是抄的他的分词......并且修改了他的程序里的一些错误。
Grammar.txt:文法的定义
<程序闭包> -> <声明语句闭包> #<程序闭包> -> <函数定义> #<程序闭包> -> $ #<函数定义> -> <修饰词闭包> <类型> <变量> ( <参数声明> ) { <函数块> } #<修饰词闭包> -> <修饰词> <修饰词闭包> #<修饰词闭包> -> $ #<修饰词> -> describe #<类型> -> type <取地址> #<取地址> -> <星号闭包> #<星号闭包> -> <星号> <星号闭包> #<星号闭包> -> $ #<星号> -> * #<变量> -> <标志符> <数组下标> #<标志符> -> id #<数组下标> -> [ <因式> ] #<数组下标> -> $ #<因式> -> ( <表达式> ) #<因式> -> <变量> #<因式> -> <数字> #<数字> -> digit #<数字> -> string #<表达式> -> <因子> <项> #<因子> -> <因式> <因式递归> #<因式递归> -> * <因式> <因式递归> #<因式递归> -> / <因式> <因式递归> #<因式递归> -> $ #<项> -> + <因子> <项> #<项> -> - <因子> <项> #<项> -> $ #<参数声明> -> <声明> <声明闭包> #<参数声明> -> $ #<声明> -> <修饰词闭包> <类型> <变量> <赋初值> #<赋初值> -> = <右值> #<赋初值> -> $ #<右值> -> <表达式> #<右值> -> { <多个数据> } #<多个数据> -> <数字> <数字闭包> #<数字闭包> -> , <数字> <数字闭包> #<数字闭包> -> $ #<声明闭包> -> , <声明> <声明闭包> #<声明闭包> -> $ #<函数块> -> <声明语句闭包> <函数块闭包> #<声明语句闭包> -> <声明语句> <声明语句闭包> #<声明语句闭包> -> $ #<声明语句> -> <声明> ; #<函数块闭包> -> <赋值函数> <函数块闭包> #<函数块闭包> -> <for循环> <函数块闭包> #<函数块闭包> -> <条件语句> <函数块闭包> #<函数块闭包> -> <函数返回> <函数块闭包> #<函数块闭包> -> $ #<赋值函数> -> <变量> <赋值或函数调用> #<赋值或函数调用> -> = <右值> ; #<赋值或函数调用> -> ( <参数列表> ) ; # <参数列表> -> <参数> <参数闭包> #<参数闭包> -> , <参数> <参数闭包> #<参数闭包> -> $ #<参数> -> <标志符> #<参数> -> <数字> #<参数> -> <字符串> #<字符串> -> string #<for循环> -> for ( <赋值函数> <逻辑表达式> ; <后缀表达式> ) { <函数块> } #<逻辑表达式> -> <表达式> <逻辑运算符> <表达式> #<逻辑运算符> -> < #<逻辑运算符> -> > #<逻辑运算符> -> == #<逻辑运算符> -> != #<后缀表达式> -> <变量> <后缀运算符> #<后缀运算符> -> ++ #<后缀运算符> -> -- #<条件语句> -> if ( <逻辑表达式> ) { <函数块> } <否则语句> #<否则语句> -> else { <函数块> } #<否则语句> -> $ #<函数返回> -> return <因式> ; #
Grammar.cpp:主程序
#include "initialize.h"int main() { Init(); Scan(); Analyse(); getchar(); return 0;}
initialize.h:初始化
#ifndef __INIT__#define __INIT__#include "WA.h"#include "SA.h"#include "Macro_Struct.h"void InitKeyMap() { keyMap.clear(); keyMap.push_back(make_pair("auto", AUTO)); keyMap.push_back(make_pair("break", BREAK)); keyMap.push_back(make_pair("case", CASE)); keyMap.push_back(make_pair("char", CHAR)); keyMap.push_back(make_pair("const", CONST)); keyMap.push_back(make_pair("continue", CONTINUE)); keyMap.push_back(make_pair("default", DEFAULT)); keyMap.push_back(make_pair("do", DO)); keyMap.push_back(make_pair("double", DOUBLE)); keyMap.push_back(make_pair("else", ELSE)); keyMap.push_back(make_pair("enum", ENUM)); keyMap.push_back(make_pair("extern", EXTERN)); keyMap.push_back(make_pair("float", FLOAT)); keyMap.push_back(make_pair("for", FOR)); keyMap.push_back(make_pair("goto", GOTO)); keyMap.push_back(make_pair("if", IF)); keyMap.push_back(make_pair("int", INT)); keyMap.push_back(make_pair("long", LONG)); keyMap.push_back(make_pair("register", REGISTER)); keyMap.push_back(make_pair("return", RETURN)); keyMap.push_back(make_pair("short", SHORT)); keyMap.push_back(make_pair("signed", SIGNED)); keyMap.push_back(make_pair("sizeof", SIZEOF)); keyMap.push_back(make_pair("static", STATIC)); keyMap.push_back(make_pair("struct", STRUCT)); keyMap.push_back(make_pair("switch", SWITCH)); keyMap.push_back(make_pair("typedef", TYPEDEF)); keyMap.push_back(make_pair("union", UNION)); keyMap.push_back(make_pair("unsigned", UNSIGNED)); keyMap.push_back(make_pair("void", VOID)); keyMap.push_back(make_pair("volatile", VOLATILE)); keyMap.push_back(make_pair("while", WHILE)); keyMap.push_back(make_pair("describe", DESCRIBE)); keyMap.push_back(make_pair("type", TYPE)); //keyMap.push_back(make_pair("string", STRING)); keyMap.push_back(make_pair("digit", DIGIT));}void InitOperMap() { operMap.clear(); operMap.push_back(make_pair("!", NOT)); operMap.push_back(make_pair("&", BYTE_AND)); operMap.push_back(make_pair("~", COMPLEMENT)); operMap.push_back(make_pair("^", BYTE_XOR)); operMap.push_back(make_pair("*", MUL)); operMap.push_back(make_pair("/", DIV)); operMap.push_back(make_pair("%", MOD)); operMap.push_back(make_pair("+", ADD)); operMap.push_back(make_pair("-", SUB)); operMap.push_back(make_pair("<", LES_THAN)); operMap.push_back(make_pair(">", GRT_THAN)); operMap.push_back(make_pair("=", ASG)); operMap.push_back(make_pair("->", ARROW)); operMap.push_back(make_pair("++", SELF_ADD)); operMap.push_back(make_pair("--", SELF_SUB)); operMap.push_back(make_pair("<<", LEFT_MOVE)); operMap.push_back(make_pair(">>", RIGHT_MOVE)); operMap.push_back(make_pair("<=", LES_EQUAL)); operMap.push_back(make_pair(">=", GRT_EQUAL)); operMap.push_back(make_pair("==", EQUAL)); operMap.push_back(make_pair("!=", NOT_EQUAL)); operMap.push_back(make_pair("&&", AND)); operMap.push_back(make_pair("||", OR)); operMap.push_back(make_pair("+=", COMPLETE_ADD)); operMap.push_back(make_pair("-=", COMPLETE_SUB)); operMap.push_back(make_pair("*=", COMPLETE_MUL)); operMap.push_back(make_pair("/=", COMPLETE_DIV)); operMap.push_back(make_pair("^=", COMPLETE_BYTE_XOR)); operMap.push_back(make_pair("&=", COMPLETE_BYTE_AND)); operMap.push_back(make_pair("~=", COMPLETE_COMPLEMENT)); operMap.push_back(make_pair("%=", COMPLETE_MOD)); operMap.push_back(make_pair("|", BYTE_OR));}void InitLimitMap() { limitMap.clear(); limitMap.push_back(make_pair("(", LEFT_BRA)); limitMap.push_back(make_pair(")", RIGHT_BRA)); limitMap.push_back(make_pair("[", LEFT_INDEX)); limitMap.push_back(make_pair("]", RIGHT_INDEX)); limitMap.push_back(make_pair("{", L_BOUNDER)); limitMap.push_back(make_pair("}", R_BOUNDER)); limitMap.push_back(make_pair(".", POINTER)); limitMap.push_back(make_pair("#", JING)); limitMap.push_back(make_pair("_", UNDER_LINE)); limitMap.push_back(make_pair(",", COMMA)); limitMap.push_back(make_pair(";", SEMI)); limitMap.push_back(make_pair("‘", SIN_QUE)); limitMap.push_back(make_pair("\"", DOU_QUE));}void ShowExprList() { for (int i = 0; i < ExprNum; i++) { printf("%s -> ", Signature[Expr[i][0]].first); for (int j = 2; j <= Expr[i][1]; j++) { printf("%s ", Signature[Expr[i][j]].first); } printf("\n"); }}void DFS_NullPossibility(int x) { //printf("%s\n",Signature[x].first); if (visited[x]) { return; } visited[x] = true; for (int i = 0; i < ExprNum; i++) { if (Expr[i][0] != x) { continue; } bool ret = true; for (int j = 2; j <= Expr[i][1]; j++) { DFS_NullPossibility(Expr[i][j]); ret &= canBeNull[Expr[i][j]]; } if (ret) { visited[x] = true; canBeNull[x] = true; return; } }}void ReadExpr() { Signature.clear(); ifstream fin("Grammar.txt"); char str[50][50]; int Length = 0; while (fin >> str[Length]) { if (strcmp(str[Length], "#") == 0) { for (int i = 0; i < Length; i++) { if (strcmp(str[i], "->") == 0) { Expr[ExprNum][i] = Length - 1; continue; } int signID = FindSignature(str[i]); if (signID == -1) { int tempLen = strlen(str[i]); if (str[i][0] == ‘<‘ && str[i][tempLen - 1] == ‘>‘) { pair<char *, bool> tempPair; tempPair.first = new char [tempLen + 1]; memcpy(tempPair.first, str[i], tempLen); tempPair.first[tempLen] = ‘\0‘; tempPair.second = Scalable; Signature.push_back(tempPair); } else { pair<char *, bool> tempPair; tempPair.first = new char [tempLen + 1]; memcpy(tempPair.first, str[i], tempLen); tempPair.first[tempLen] = ‘\0‘; tempPair.second = unScalsble; Signature.push_back(tempPair); } signID = Signature.size() - 1; } Expr[ExprNum][i] = signID; } Length = 0; ExprNum++; } else { Length++; } } fin.close(); memset(canBeNull, false, sizeof(canBeNull)); for (int i = 0; i < Signature.size(); i++) { if (strcmp(Signature[i].first, "$") == 0) { canBeNull[i] = true; } } memset(visited, false, sizeof(visited)); for (int i = 0; i < Signature.size(); i++) { if (Signature[i].second == Scalable) { DFS_NullPossibility(i); } } //ShowExprList();}void DFS_FirstSet(int x) { if (visited[x] || Signature[x].second == unScalsble) { return; } visited[x] = true; for (int i = 0; i < ExprNum; i++) { if (Expr[i][0] != x) { continue; } for (int j = 2; j <= Expr[i][1]; j++) { if (Signature[Expr[i][j]].second == unScalsble) { First[x][Expr[i][j]] = true; } else { DFS_FirstSet(Expr[i][j]); for (int k = 0; k < Signature.size(); k++) { if (First[Expr[i][j]][k]) { First[x][k] = true; } } } if (!canBeNull[Expr[i][j]]) { break; } } }}void GetFirstSet() { memset(First, false, sizeof(First)); memset(visited, false, sizeof(visited)); for (int i = 0; i < Signature.size(); i++) { if (Signature[i].second == unScalsble) { continue; } DFS_FirstSet(i); }}void GetFollowSet() {}void GetSelectSet() { memset(Select, false, sizeof(Select)); for (int i = 0; i < ExprNum; i++) { for (int j = 2; j <= Expr[i][1]; j++) { if (Signature[Expr[i][j]].second == unScalsble) { Select[i][Expr[i][j]] = true; } else { for (int k = 0; k < Signature.size(); k++) { if (First[Expr[i][j]][k]) { Select[i][k] = true; } } } if (!canBeNull[Expr[i][j]]) { break; } } if (!canBeNull[Expr[i][0]]) { Select[i][FindSignature("$")] = false; } }}void ShowShiftList() { for (int i = 0; i < Signature.size(); i++) { for (int j = 0; j < Signature.size(); j++) { if (ShiftList[i][j][0][0] <= 1 || i == j) { continue; } printf("%3d %30s -> %30s\n", ShiftList[i][j][0][0], Signature[i].first, Signature[j].first); for (int k = 1; k <= ShiftList[i][j][0][0]; k++) { printf("---------->"); for (int h = 1; h <= ShiftList[i][j][k][0]; h++) { printf("%s ", Signature[ShiftList[i][j][k][h]].first); } printf("\n"); } } }}void GetShiftList() { memset(ShiftList, 0, sizeof(ShiftList)); for (int i = 0; i < ExprNum; i++) { for (int j = 0; j < Signature.size(); j++) { if (!Select[i][j]) { continue; } /*if (Signature[j].second == Scalable) { printf("OMG\n"); }*/ ShiftList[Expr[i][0]][j][0][0]++; for (int k = 1; k <= Expr[i][1]; k++) { ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][k - 1] = Expr[i][k]; } ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][0]--; } } //ShowShiftList();}void Init() { InitKeyMap(); InitOperMap(); InitLimitMap(); tokenList.clear(); errorList.clear(); ReadExpr(); GetFirstSet(); GetFollowSet(); GetSelectSet(); GetShiftList();}#endif
WA.h:词法分析
#ifndef __WA__#define __WA__#include "stdafx.h"#include "Macro_Struct.h"vector<pair<const char *, int>> keyMap;vector<pair<const char *, int>> operMap;vector<pair<const char *, int>> limitMap;vector<Token> tokenList;vector<Error> errorList;int SeekKey(char * word) { for (int i = 0; i < keyMap.size(); i++) { if (strcmp(word, keyMap[i].first) == 0) { return keyMap[i].second; } } return IDENTIFER;}void InsertToken(char * content, char * describe, int type, int line) { Token tempToken; strcpy_s(tempToken.content, content); strcpy_s(tempToken.describe, describe); tempToken.type = type; tempToken.line = line; tokenList.push_back(tempToken);}void InsertError(char * content, char * describe, int type, int line) { Error tempError; strcpy_s(tempError.content, content); strcpy_s(tempError.describe, describe); tempError.type = type; tempError.line = line; errorList.push_back(tempError); printf("Line %d:%s\n", line, describe);}void preProcess(char * word, int line) { regex INCLUDE_REGEX("#include\\s*<[\\w\\.]+>\\s*"); regex DEFINE_REGEX("#define\\s+\\w+\\s+\\w+\\s*"); if (regex_match(word, INCLUDE_REGEX)) { return; } if (regex_match(word, DEFINE_REGEX)) { return; } InsertError(word, PRE_PROCESS_ERROR, PRE_PROCESS_ERROR_NUM, line);}void Scan() { char ch; char array[30];//单词长度上限是30 char * word; int i; int line = 1;//行数 FILE * infile; errno_t err = fopen_s(&infile, "input.txt", "r"); ch = fgetc(infile); while (ch != EOF) { i = 0; //以字母或者下划线开头,处理关键字或者标识符 if ((ch >= ‘A‘ && ch <= ‘Z‘) || (ch >= ‘a‘ && ch <= ‘z‘) || ch == ‘_‘) { while ((ch >= ‘A‘ && ch <= ‘Z‘) || (ch >= ‘a‘ && ch <= ‘z‘) || (ch >= ‘0‘ && ch <= ‘9‘) || ch == ‘_‘) { array[i++] = ch; ch = fgetc(infile); } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; int seekTemp = SeekKey(word); if (seekTemp != IDENTIFER) { InsertToken(word, KEY_DESC, seekTemp, line); } else { InsertToken(word, IDENTIFER_DESC, seekTemp, line); } fseek(infile, -1L, SEEK_CUR); //向后回退一位 } //以数字开头,处理数字 else if (ch >= ‘0‘ && ch <= ‘9‘) { int flag = 0; int flag2 = 0; //处理整数 while (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } //处理float if (ch == ‘.‘) { flag2 = 1; array[i++] = ch; ch = fgetc(infile); if (ch >= ‘0‘ && ch <= ‘9‘) { while (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } } else { flag = 1; } //处理Double if (ch == ‘E‘ || ch == ‘e‘) { array[i++] = ch; ch = fgetc(infile); if (ch == ‘+‘ || ch == ‘-‘) { array[i++] = ch; ch = fgetc(infile); } if (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } else { flag = 2; } } } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; if (flag == 1) { InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line); } else if (flag == 2) { InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line); } else { if (flag2 == 0) { InsertToken(word, CONSTANT_DESC, INT_VAL, line); } else { InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line); } } fseek(infile, -1L, SEEK_CUR); //向后回退一位 } //以"/"开头 else if (ch == ‘/‘) { ch = fgetc(infile); //处理运算符"/=" if (ch == ‘=‘) { InsertToken("/=", OPE_DESC, COMPLETE_DIV, line); } //处理"/**/"型注释 else if (ch == ‘*‘) { ch = fgetc(infile); while (1) { while (ch != ‘*‘) { if (ch == ‘\n‘) { line++; } ch = fgetc(infile); if (ch == EOF) { InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line); return; } } ch = fgetc(infile); if (ch == ‘/‘) { break; } if (ch == EOF) { InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line); return; } } InsertToken(_NULL, NOTE_DESC, NOTE1, line); } //处理"//"型注释 else if (ch == ‘/‘) { while (ch != ‘\n‘) { ch = fgetc(infile); if (ch == EOF) { InsertToken(_NULL, NOTE_DESC, NOTE2, line); return; } } line++; InsertToken(_NULL, NOTE_DESC, NOTE2, line); if (ch == EOF) { return; } } //处理除号 else { InsertToken("/", OPE_DESC, DIV, line); } } //处理常量字符串 else if (ch == ‘"‘) { InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line); ch = fgetc(infile); i = 0; while (ch != ‘"‘) { array[i++] = ch; if (ch == ‘\n‘) { line++; } ch = fgetc(infile); if (ch == EOF) { InsertError(_NULL, STRING_ERROR, STRING_ERROR_NUM, line); return; } } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; InsertToken(word, CONSTANT_DESC, STRING_VAL, line); InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line); } //处理常量字符 else if (ch == ‘\‘‘) { InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line); ch = fgetc(infile); i = 0; while (ch != ‘\‘‘) { array[i++] = ch; if (ch == ‘\n‘) { line++; } ch = fgetc(infile); if (ch == EOF) { InsertError(_NULL, CHARCONST_ERROR, CHARCONST_ERROR_NUM, line); return; } } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; InsertToken(word, CONSTANT_DESC, CHAR_VAL, line); InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line); } else if (ch == ‘ ‘ || ch == ‘\t‘ || ch == ‘\r‘ || ch == ‘\n‘) { if (ch == ‘\n‘) { line++; } } else { if (ch == EOF) { return; } //处理头文件和宏常量(预处理) else if (ch == ‘#‘) { while (ch != ‘\n‘ && ch != EOF) { array[i++] = ch; ch = fgetc(infile); } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; preProcess(word, line); fseek(infile, -1L, SEEK_CUR); //向后回退一位 } //处理-开头的运算符 else if (ch == ‘-‘) { array[i++] = ch; ch = fgetc(infile); if (ch >= ‘0‘ && ch <= ‘9‘) { int flag = 0; int flag2 = 0; //处理整数 while (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } //处理float if (ch == ‘.‘) { flag2 = 1; array[i++] = ch; ch = fgetc(infile); if (ch >= ‘0‘ && ch <= ‘9‘) { while (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } } else { flag = 1; } //处理Double if (ch == ‘E‘ || ch == ‘e‘) { array[i++] = ch; ch = fgetc(infile); if (ch == ‘+‘ || ch == ‘-‘) { array[i++] = ch; ch = fgetc(infile); } if (ch >= ‘0‘ && ch <= ‘9‘) { array[i++] = ch; ch = fgetc(infile); } else { flag = 2; } } } word = new char[i + 1]; memcpy(word, array, i); word[i] = ‘\0‘; if (flag == 1) { InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line); } else if (flag == 2) { InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line); } else { if (flag2 == 0) { InsertToken(word, CONSTANT_DESC, INT_VAL, line); } else { InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line); } } fseek(infile, -1L, SEEK_CUR); //向后回退一位 } else if (ch == ‘>‘) { InsertToken("->", OPE_DESC, ARROW, line); } else if (ch == ‘-‘) { InsertToken("--", OPE_DESC, SELF_SUB, line); } else if (ch == ‘=‘) { InsertToken("--", OPE_DESC, SELF_SUB, line); } else { InsertToken("-", OPE_DESC, SUB, line); fseek(infile, -1L, SEEK_CUR); } } //处理+开头的运算符 else if (ch == ‘+‘) { ch = fgetc(infile); if (ch == ‘+‘) { InsertToken("++", OPE_DESC, SELF_ADD, line); } else if (ch == ‘=‘) { InsertToken("+=", OPE_DESC, COMPLETE_ADD, line); } else { InsertToken("+", OPE_DESC, ADD, line); fseek(infile, -1L, SEEK_CUR); } } //处理*开头的运算符 else if (ch == ‘*‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("*=", OPE_DESC, COMPLETE_MUL, line); } else { InsertToken("*", OPE_DESC, MUL, line); fseek(infile, -1L, SEEK_CUR); } } //处理按^开头的运算符 else if (ch == ‘^‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("^=", OPE_DESC, COMPLETE_BYTE_XOR, line); } else { InsertToken("^", OPE_DESC, BYTE_XOR, line); fseek(infile, -1L, SEEK_CUR); } } //处理%开头的运算符 else if (ch == ‘%‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("%=", OPE_DESC, COMPLETE_MOD, line); } else { InsertToken("%", OPE_DESC, MOD, line); fseek(infile, -1L, SEEK_CUR); } } //处理&开头的运算符 else if (ch == ‘&‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("&=", OPE_DESC, COMPLETE_BYTE_AND, line); } else if (ch == ‘&‘) { InsertToken("&&", OPE_DESC, AND, line); } else { InsertToken("&", OPE_DESC, BYTE_AND, line); fseek(infile, -1L, SEEK_CUR); } } //处理~开头的运算符 else if (ch == ‘~‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("~=", OPE_DESC, COMPLETE_COMPLEMENT, line); } else { InsertToken("~", OPE_DESC, COMPLEMENT, line); fseek(infile, -1L, SEEK_CUR); } } //处理!开头的运算符 else if (ch == ‘!‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("!=", OPE_DESC, NOT_EQUAL, line); } else { InsertToken("!", OPE_DESC, NOT, line); fseek(infile, -1L, SEEK_CUR); } } //处理<开头的运算符 else if (ch == ‘<‘) { ch = fgetc(infile); if (ch == ‘<‘) { InsertToken("<<", OPE_DESC, LEFT_MOVE, line); } else if (ch == ‘=‘) { InsertToken("<=", OPE_DESC, LES_EQUAL, line); } else { InsertToken("<", OPE_DESC, LES_THAN, line); fseek(infile, -1L, SEEK_CUR); } } //处理>开头的运算符 else if (ch == ‘>‘) { ch = fgetc(infile); if (ch == ‘>‘) { InsertToken(">>", OPE_DESC, RIGHT_MOVE, line); } else if (ch == ‘=‘) { InsertToken(">=", OPE_DESC, GRT_EQUAL, line); } else { InsertToken(">", OPE_DESC, GRT_THAN, line); fseek(infile, -1L, SEEK_CUR); } } //处理|开头的运算符 else if (ch == ‘|‘) { ch = fgetc(infile); if (ch == ‘|‘) { InsertToken("||", OPE_DESC, OR, line); } else { InsertToken("|", OPE_DESC, BYTE_OR, line); fseek(infile, -1L, SEEK_CUR); } } else if (ch == ‘=‘) { ch = fgetc(infile); if (ch == ‘=‘) { InsertToken("==", OPE_DESC, EQUAL, line); } else { InsertToken("=", OPE_DESC, ASG, line); fseek(infile, -1L, SEEK_CUR); } } else if (ch == ‘(‘) { InsertToken("(", CLE_OPE_DESC, LEFT_BRA, line); } else if (ch == ‘)‘) { InsertToken(")", CLE_OPE_DESC, RIGHT_BRA, line); } else if (ch == ‘[‘) { InsertToken("[", CLE_OPE_DESC, LEFT_INDEX, line); } else if (ch == ‘]‘) { InsertToken("]", CLE_OPE_DESC, RIGHT_INDEX, line); } else if (ch == ‘{‘) { InsertToken("{", CLE_OPE_DESC, L_BOUNDER, line); } else if (ch == ‘}‘) { InsertToken("}", CLE_OPE_DESC, R_BOUNDER, line); } else if (ch == ‘.‘) { InsertToken(".", CLE_OPE_DESC, POINTER, line); } else if (ch == ‘,‘) { InsertToken(",", CLE_OPE_DESC, COMMA, line); } else if (ch == ‘;‘) { InsertToken(";", CLE_OPE_DESC, SEMI, line); } else { char temp[2]; temp[0] = ch; temp[1] = ‘\0‘; InsertError(temp, CHAR_ERROR, CHAR_ERROR_NUM, line); } } ch = fgetc(infile); } fclose(infile);}#endif
SA.h:语法分析
#ifndef __SA__#define __SA__#include "stdafx.h"#include "Macro_Struct.h"#include "initialize.h"#define unScalsble false#define Scalable truevector<pair<char *, bool>> Signature;int CodeStack[5000], TryStack[5000], tokenId[5000];int Csize = 0, Tsize = 0;int Expr[100][100];int ExprNum = 0;bool First[100][100];bool visited[100];bool Follow[100][100];bool Select[100][100];int ShiftList[100][100][10][100];bool canBeNull[100];int ArrayFlag;int FindSignature(char * str) { for (int i = 0; i < Signature.size(); i++) { if (strcmp(str, Signature[i].first) == 0) { return i; } } return -1;}int Try(int top, int sn, int ptr) { if (ptr < 0 || Tsize <= top) { return -10; } int Expl = TryStack[top]; Tsize += (ShiftList[Expl][CodeStack[ptr]][sn][0] - 1); for (int i = 1; i <= ShiftList[Expl][CodeStack[ptr]][sn][0]; i++) { TryStack[Tsize - i] = ShiftList[Expl][CodeStack[ptr]][sn][i]; } while (Tsize > top) { if (ptr < 0) { if (canBeNull[TryStack[Tsize - 1]]) { Tsize--; continue; } else { printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[0]].line, tokenList[tokenId[0]].content, Signature[Expl].first); return -10; } } if (TryStack[Tsize - 1] == CodeStack[ptr]) { Tsize--; ptr--; continue; } if (Signature[TryStack[Tsize - 1]].second == unScalsble) { printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first); return -10; } if (ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0] == 0) { if (canBeNull[TryStack[Tsize - 1]]) { Tsize--; continue; } else { printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first); return -10; } } bool Match = false; for (int i = 1; i <= ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0]; i++) { int tempTs = Tsize; int tempTi = TryStack[Tsize - 1]; int ret = Try(Tsize - 1, i, ptr); if (ret != -10) { Match = true; ptr = ret; break; } else { Tsize = tempTs; TryStack[Tsize - 1] = tempTi; } } if (!Match) { printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first); return -10; } } return ptr;}void Analyse() { for (int i = tokenList.size() - 1; i >= 0; i--) { if (tokenList[i].type == AUTO || tokenList[i].type == CONST || tokenList[i].type == UNSIGNED || tokenList[i].type == SIGNED || tokenList[i].type == STATIC || tokenList[i].type == VOLATILE) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("describe"); } else if (tokenList[i].type == INT_VAL) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("digit"); } else if (tokenList[i].type == CHAR || tokenList[i].type == DOUBLE || tokenList[i].type == FLOAT || tokenList[i].type == INT || tokenList[i].type == LONG || tokenList[i].type == SHORT || tokenList[i].type == VOID) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("type"); } else if (tokenList[i].type == STRING_VAL) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("string"); } else if (tokenList[i].type == DOU_QUE || tokenList[i].type == SIN_QUE || tokenList[i].type == NOTE1 || tokenList[i].type == NOTE2) { } else if (tokenList[i].type == IDENTIFER) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("id"); } else if (tokenList[i].type == FOR) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("for"); } else if (tokenList[i].type == IF) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("if"); } else if (tokenList[i].type == ELSE) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("else"); } else if (tokenList[i].type == RETURN) { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature("return"); } else { tokenId[Csize] = i; CodeStack[Csize++] = FindSignature(tokenList[i].content); } if (tokenList[i].type != DOU_QUE && tokenList[i].type != SIN_QUE) { printf("%15s %15s %15d\n", Signature[CodeStack[Csize - 1]].first, tokenList[i].content, tokenList[i].type); } } ArrayFlag = -1; while (Csize) { if (ShiftList[0][CodeStack[Csize - 1]][0][0] == 0) { printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content); break; } bool Match = false; for (int i = 1; i <= ShiftList[0][CodeStack[Csize - 1]][0][0]; i++) { Tsize = 1; TryStack[0] = 0; int ret = Try(0, i, Csize - 1); if (ret != -10) { Match = true; Csize = ret + 1; break; } } if (!Match) { printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content); break; } } if (Csize == 0) { printf("Successful\n"); }}#endif
Macro_Struct.h:宏定义以及结构体定义
#ifndef __MS__#define __MS__#define AUTO 1#define BREAK 2#define CASE 3#define CHAR 4#define CONST 5#define CONTINUE 6#define DEFAULT 7#define DO 8#define DOUBLE 9#define ELSE 10#define ENUM 11#define EXTERN 12#define FLOAT 13#define FOR 14#define GOTO 15#define IF 16#define INT 17#define LONG 18#define REGISTER 19#define RETURN 20#define SHORT 21#define SIGNED 22#define SIZEOF 23#define STATIC 24#define STRUCT 25#define SWITCH 26#define TYPEDEF 27#define UNION 28#define UNSIGNED 29#define VOID 30#define VOLATILE 31#define WHILE 32#define KEY_DESC "关键字"//标志符#define IDENTIFER 40#define IDENTIFER_DESC "标志符"//常量#define INT_VAL 51 //整形常量#define CHAR_VAL 52 //字符常量#define FLOAT_VAL 53 //浮点数常量#define STRING_VAL 54 //双精度浮点数常量#define MACRO_VAL 55 //宏常量#define CONSTANT_DESC "常量"//运算符#define NOT 61 // !#define BYTE_AND 62 //&#define COMPLEMENT 63 // ~#define BYTE_XOR 64 // ^#define MUL 65 // *#define DIV 66// /#define MOD 67 // %#define ADD 68 // +#define SUB 69 // -#define LES_THAN 70 // <#define GRT_THAN 71 // >#define ASG 72 // =#define ARROW 73 // ->#define SELF_ADD 74 // ++#define SELF_SUB 75 // --#define LEFT_MOVE 76 // <<#define RIGHT_MOVE 77 // >>#define LES_EQUAL 78 // <=#define GRT_EQUAL 79 // >=#define EQUAL 80 // ==#define NOT_EQUAL 81 // !=#define AND 82 // &&#define OR 83 // ||#define COMPLETE_ADD 84 // +=#define COMPLETE_SUB 85 // -=#define COMPLETE_MUL 86 // *=#define COMPLETE_DIV 87 // /=#define COMPLETE_BYTE_XOR 88 // ^=#define COMPLETE_BYTE_AND 89 // &=#define COMPLETE_COMPLEMENT 90 // ~=#define COMPLETE_MOD 91 //%=#define BYTE_OR 92 // |#define OPE_DESC "运算符"//限界符#define LEFT_BRA 100 // (#define RIGHT_BRA 101 // )#define LEFT_INDEX 102 // [#define RIGHT_INDEX 103 // ]#define L_BOUNDER 104 // {#define R_BOUNDER 105 // }#define POINTER 106 // .#define JING 107 // ##define UNDER_LINE 108 // _#define COMMA 109 // ,#define SEMI 110 // ;#define SIN_QUE 111 // ‘#define DOU_QUE 112 // "#define CLE_OPE_DESC "限界符"#define NOTE1 120 // "/**/"注释#define NOTE2 121 // "//"注释#define NOTE_DESC "注释"#define HEADER 130 //头文件#define HEADER_DESC "头文件"//错误类型#define FLOAT_ERROR "float表示错误"#define FLOAT_ERROR_NUM 1#define DOUBLE_ERROR "double表示错误"#define DOUBLE_ERROR_NUM 2#define NOTE_ERROR "注释没有结束符"#define NOTE_ERROR_NUM 3#define STRING_ERROR "字符串常量没有结束符"#define STRING_ERROR_NUM 4#define CHARCONST_ERROR "字符常量没有结束符"#define CHARCONST_ERROR_NUM 5#define CHAR_ERROR "非法字符"#define CHAR_ERROR_NUM 6#define LEFT_BRA_ERROR "‘(‘没有对应项"#define LEFT_BRA_ERROR_NUM 7#define RIGHT_BRA_ERROR "‘)‘没有对应项"#define RIGHT_BRA_ERROR_NUM 8#define LEFT_INDEX_ERROR "‘[‘没有对应项"#define LEFT_INDEX_ERROR_NUM 9#define RIGHT_INDEX_ERROR "‘]‘没有对应项"#define RIGHT_INDEX_ERROR_NUM 10#define L_BOUNDER_ERROR "‘{‘没有对应项"#define L_BOUNDER_ERROR_NUM 11#define R_BOUNDER_ERROR "‘}‘没有对应项"#define R_BOUNDER_ERROR_NUM 12#define PRE_PROCESS_ERROR "预处理错误" //头文件或者宏定义错误#define PRE_PROCESS_ERROR_NUM 13#define _NULL "无"#define DESCRIBE 4000#define TYPE 4001#define STRING 4002#define DIGIT 4003struct Token{ char content[30];//内容 char describe[30];//描述 int type;//种别码 int line;//所在行数};struct Error { char content[30];//错误内容 char describe[30];//错误描述 int type; int line;//所在行数};#endif
stdafx.h:头文件包含
#ifndef __STDAFX__#define __STDAFX__#include <iostream>#include <fstream>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <vector>#include <iomanip>#include <stack>#include <regex>using namespace std;#endif
input.txt:要检查的程序
一个简单的C语言语法检查器的实现
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。