首页 > 代码库 > 一个简单的C语言语法检查器的实现

一个简单的C语言语法检查器的实现

  我自己的实现方法的核心过程:首先用一个非终结符代表所有要检查的程序代码,然后根据文法将这个整体的符号不断展开,以拼凑成按检查的程序的顺序排列的终结符序列,能成功说明语法正确,否则有错误。

关键词:分词;First集;Select集;

  目前还存在的问题:

    1.因为同一个非终结符->终结符的转化可能有多种解释,所以目前我的非终结符展开这里是用递归写的,因此不能定位具体错在哪里。

    2.int a={1,2};int a[2]={1,‘b‘};这种该出错的地方不会出错。这个比较致命,但目前还没想好怎么解决。

  代码部分借鉴了这里,我直接用了他的分词的scnner函数和宏定义的部分。他的程序总共2000+行,我改了一下,总共只用1000行就实现了他的功能,其中500行是抄的他的分词......并且修改了他的程序里的一些错误。

Grammar.txt:文法的定义

技术分享
<程序闭包> -> <声明语句闭包> #<程序闭包> -> <函数定义> #<程序闭包> -> $ #<函数定义> -> <修饰词闭包> <类型> <变量> ( <参数声明> ) { <函数块> } #<修饰词闭包> -> <修饰词> <修饰词闭包> #<修饰词闭包> -> $ #<修饰词> -> describe #<类型> -> type <取地址> #<取地址> -> <星号闭包> #<星号闭包> -> <星号> <星号闭包> #<星号闭包> -> $ #<星号> -> * #<变量> -> <标志符> <数组下标> #<标志符> -> id #<数组下标> -> [ <因式> ] #<数组下标> -> $ #<因式> -> ( <表达式> ) #<因式> -> <变量> #<因式> -> <数字> #<数字> -> digit #<数字> -> string #<表达式> -> <因子> <项> #<因子> -> <因式> <因式递归> #<因式递归> -> * <因式> <因式递归> #<因式递归> -> / <因式> <因式递归> #<因式递归> -> $ #<项> -> + <因子> <项> #<项> -> - <因子> <项> #<项> -> $ #<参数声明> -> <声明> <声明闭包> #<参数声明> -> $ #<声明> -> <修饰词闭包> <类型> <变量> <赋初值> #<赋初值> -> = <右值> #<赋初值> -> $ #<右值> -> <表达式> #<右值> -> { <多个数据> } #<多个数据> -> <数字> <数字闭包> #<数字闭包> -> , <数字> <数字闭包> #<数字闭包> -> $ #<声明闭包> -> , <声明> <声明闭包> #<声明闭包> -> $ #<函数块> -> <声明语句闭包> <函数块闭包> #<声明语句闭包> -> <声明语句> <声明语句闭包> #<声明语句闭包> -> $ #<声明语句> -> <声明> ; #<函数块闭包> -> <赋值函数> <函数块闭包> #<函数块闭包> -> <for循环> <函数块闭包> #<函数块闭包> -> <条件语句> <函数块闭包> #<函数块闭包> -> <函数返回> <函数块闭包> #<函数块闭包> -> $ #<赋值函数> -> <变量> <赋值或函数调用> #<赋值或函数调用> -> = <右值> ; #<赋值或函数调用> -> ( <参数列表> ) ; # <参数列表> -> <参数> <参数闭包> #<参数闭包> -> , <参数> <参数闭包> #<参数闭包> -> $ #<参数> -> <标志符> #<参数> -> <数字> #<参数> -> <字符串> #<字符串> -> string #<for循环> -> for ( <赋值函数> <逻辑表达式> ; <后缀表达式> ) { <函数块> } #<逻辑表达式> -> <表达式> <逻辑运算符> <表达式> #<逻辑运算符> -> < #<逻辑运算符> -> > #<逻辑运算符> -> == #<逻辑运算符> -> != #<后缀表达式> -> <变量> <后缀运算符> #<后缀运算符> -> ++ #<后缀运算符> -> -- #<条件语句> -> if ( <逻辑表达式> ) { <函数块> } <否则语句> #<否则语句> -> else { <函数块> } #<否则语句> -> $ #<函数返回> -> return <因式> ; #
View Code

Grammar.cpp:主程序

#include "initialize.h"int main() {    Init();    Scan();    Analyse();    getchar();    return 0;}

initialize.h:初始化

技术分享
#ifndef __INIT__#define __INIT__#include "WA.h"#include "SA.h"#include "Macro_Struct.h"void InitKeyMap() {    keyMap.clear();    keyMap.push_back(make_pair("auto", AUTO));    keyMap.push_back(make_pair("break", BREAK));    keyMap.push_back(make_pair("case", CASE));    keyMap.push_back(make_pair("char", CHAR));    keyMap.push_back(make_pair("const", CONST));    keyMap.push_back(make_pair("continue", CONTINUE));    keyMap.push_back(make_pair("default", DEFAULT));    keyMap.push_back(make_pair("do", DO));    keyMap.push_back(make_pair("double", DOUBLE));    keyMap.push_back(make_pair("else", ELSE));    keyMap.push_back(make_pair("enum", ENUM));    keyMap.push_back(make_pair("extern", EXTERN));    keyMap.push_back(make_pair("float", FLOAT));    keyMap.push_back(make_pair("for", FOR));    keyMap.push_back(make_pair("goto", GOTO));    keyMap.push_back(make_pair("if", IF));    keyMap.push_back(make_pair("int", INT));    keyMap.push_back(make_pair("long", LONG));    keyMap.push_back(make_pair("register", REGISTER));    keyMap.push_back(make_pair("return", RETURN));    keyMap.push_back(make_pair("short", SHORT));    keyMap.push_back(make_pair("signed", SIGNED));    keyMap.push_back(make_pair("sizeof", SIZEOF));    keyMap.push_back(make_pair("static", STATIC));    keyMap.push_back(make_pair("struct", STRUCT));    keyMap.push_back(make_pair("switch", SWITCH));    keyMap.push_back(make_pair("typedef", TYPEDEF));    keyMap.push_back(make_pair("union", UNION));    keyMap.push_back(make_pair("unsigned", UNSIGNED));    keyMap.push_back(make_pair("void", VOID));    keyMap.push_back(make_pair("volatile", VOLATILE));    keyMap.push_back(make_pair("while", WHILE));    keyMap.push_back(make_pair("describe", DESCRIBE));    keyMap.push_back(make_pair("type", TYPE));    //keyMap.push_back(make_pair("string", STRING));    keyMap.push_back(make_pair("digit", DIGIT));}void InitOperMap() {    operMap.clear();    operMap.push_back(make_pair("!", NOT));    operMap.push_back(make_pair("&", BYTE_AND));    operMap.push_back(make_pair("~", COMPLEMENT));    operMap.push_back(make_pair("^", BYTE_XOR));    operMap.push_back(make_pair("*", MUL));    operMap.push_back(make_pair("/", DIV));    operMap.push_back(make_pair("%", MOD));    operMap.push_back(make_pair("+", ADD));    operMap.push_back(make_pair("-", SUB));    operMap.push_back(make_pair("<", LES_THAN));    operMap.push_back(make_pair(">", GRT_THAN));    operMap.push_back(make_pair("=", ASG));    operMap.push_back(make_pair("->", ARROW));    operMap.push_back(make_pair("++", SELF_ADD));    operMap.push_back(make_pair("--", SELF_SUB));    operMap.push_back(make_pair("<<", LEFT_MOVE));    operMap.push_back(make_pair(">>", RIGHT_MOVE));    operMap.push_back(make_pair("<=", LES_EQUAL));    operMap.push_back(make_pair(">=", GRT_EQUAL));    operMap.push_back(make_pair("==", EQUAL));    operMap.push_back(make_pair("!=", NOT_EQUAL));    operMap.push_back(make_pair("&&", AND));    operMap.push_back(make_pair("||", OR));    operMap.push_back(make_pair("+=", COMPLETE_ADD));    operMap.push_back(make_pair("-=", COMPLETE_SUB));    operMap.push_back(make_pair("*=", COMPLETE_MUL));    operMap.push_back(make_pair("/=", COMPLETE_DIV));    operMap.push_back(make_pair("^=", COMPLETE_BYTE_XOR));    operMap.push_back(make_pair("&=", COMPLETE_BYTE_AND));    operMap.push_back(make_pair("~=", COMPLETE_COMPLEMENT));    operMap.push_back(make_pair("%=", COMPLETE_MOD));    operMap.push_back(make_pair("|", BYTE_OR));}void InitLimitMap() {    limitMap.clear();    limitMap.push_back(make_pair("(", LEFT_BRA));    limitMap.push_back(make_pair(")", RIGHT_BRA));    limitMap.push_back(make_pair("[", LEFT_INDEX));    limitMap.push_back(make_pair("]", RIGHT_INDEX));    limitMap.push_back(make_pair("{", L_BOUNDER));    limitMap.push_back(make_pair("}", R_BOUNDER));    limitMap.push_back(make_pair(".", POINTER));    limitMap.push_back(make_pair("#", JING));    limitMap.push_back(make_pair("_", UNDER_LINE));    limitMap.push_back(make_pair(",", COMMA));    limitMap.push_back(make_pair(";", SEMI));    limitMap.push_back(make_pair("", SIN_QUE));    limitMap.push_back(make_pair("\"", DOU_QUE));}void ShowExprList() {    for (int i = 0; i < ExprNum; i++) {        printf("%s -> ", Signature[Expr[i][0]].first);        for (int j = 2; j <= Expr[i][1]; j++) {            printf("%s ", Signature[Expr[i][j]].first);        }        printf("\n");    }}void DFS_NullPossibility(int x) {    //printf("%s\n",Signature[x].first);    if (visited[x]) {        return;    }    visited[x] = true;    for (int i = 0; i < ExprNum; i++) {        if (Expr[i][0] != x) {            continue;        }        bool ret = true;        for (int j = 2; j <= Expr[i][1]; j++) {            DFS_NullPossibility(Expr[i][j]);            ret &= canBeNull[Expr[i][j]];        }        if (ret) {            visited[x] = true;            canBeNull[x] = true;            return;        }    }}void ReadExpr() {    Signature.clear();    ifstream fin("Grammar.txt");    char str[50][50];    int Length = 0;    while (fin >> str[Length]) {        if (strcmp(str[Length], "#") == 0) {            for (int i = 0; i < Length; i++) {                if (strcmp(str[i], "->") == 0) {                    Expr[ExprNum][i] = Length - 1;                    continue;                }                int signID = FindSignature(str[i]);                if (signID == -1) {                    int tempLen = strlen(str[i]);                    if (str[i][0] == < && str[i][tempLen - 1] == >) {                        pair<char *, bool> tempPair;                        tempPair.first = new char [tempLen + 1];                        memcpy(tempPair.first, str[i], tempLen);                        tempPair.first[tempLen] = \0;                        tempPair.second = Scalable;                        Signature.push_back(tempPair);                    } else {                        pair<char *, bool> tempPair;                        tempPair.first = new char [tempLen + 1];                        memcpy(tempPair.first, str[i], tempLen);                        tempPair.first[tempLen] = \0;                        tempPair.second = unScalsble;                        Signature.push_back(tempPair);                    }                    signID = Signature.size() - 1;                }                Expr[ExprNum][i] = signID;            }            Length = 0;            ExprNum++;        } else {            Length++;        }    }    fin.close();    memset(canBeNull, false, sizeof(canBeNull));    for (int i = 0; i < Signature.size(); i++) {        if (strcmp(Signature[i].first, "$") == 0) {            canBeNull[i] = true;        }    }    memset(visited, false, sizeof(visited));    for (int i = 0; i < Signature.size(); i++) {        if (Signature[i].second == Scalable) {            DFS_NullPossibility(i);        }    }    //ShowExprList();}void DFS_FirstSet(int x) {    if (visited[x] || Signature[x].second == unScalsble) {        return;    }    visited[x] = true;    for (int i = 0; i < ExprNum; i++) {        if (Expr[i][0] != x) {            continue;        }        for (int j = 2; j <= Expr[i][1]; j++) {            if (Signature[Expr[i][j]].second == unScalsble) {                First[x][Expr[i][j]] = true;            } else {                DFS_FirstSet(Expr[i][j]);                for (int k = 0; k < Signature.size(); k++) {                    if (First[Expr[i][j]][k]) {                        First[x][k] = true;                    }                }            }            if (!canBeNull[Expr[i][j]]) {                break;            }        }    }}void GetFirstSet() {    memset(First, false, sizeof(First));    memset(visited, false, sizeof(visited));    for (int i = 0; i < Signature.size(); i++) {        if (Signature[i].second == unScalsble) {            continue;        }        DFS_FirstSet(i);    }}void GetFollowSet() {}void GetSelectSet() {    memset(Select, false, sizeof(Select));    for (int i = 0; i < ExprNum; i++) {        for (int j = 2; j <= Expr[i][1]; j++) {            if (Signature[Expr[i][j]].second == unScalsble) {                Select[i][Expr[i][j]] = true;            } else {                for (int k = 0; k < Signature.size(); k++) {                    if (First[Expr[i][j]][k]) {                        Select[i][k] = true;                    }                }            }            if (!canBeNull[Expr[i][j]]) {                break;            }        }        if (!canBeNull[Expr[i][0]]) {            Select[i][FindSignature("$")] = false;        }    }}void ShowShiftList() {    for (int i = 0; i < Signature.size(); i++) {        for (int j = 0; j < Signature.size(); j++) {            if (ShiftList[i][j][0][0] <= 1 || i == j) {                continue;            }            printf("%3d %30s -> %30s\n", ShiftList[i][j][0][0], Signature[i].first, Signature[j].first);            for (int k = 1; k <= ShiftList[i][j][0][0]; k++) {                printf("---------->");                for (int h = 1; h <= ShiftList[i][j][k][0]; h++) {                    printf("%s ", Signature[ShiftList[i][j][k][h]].first);                }                printf("\n");            }        }    }}void GetShiftList() {    memset(ShiftList, 0, sizeof(ShiftList));    for (int i = 0; i < ExprNum; i++) {        for (int j = 0; j < Signature.size(); j++) {            if (!Select[i][j]) {                continue;            }            /*if (Signature[j].second == Scalable) {                printf("OMG\n");            }*/            ShiftList[Expr[i][0]][j][0][0]++;            for (int k = 1; k <= Expr[i][1]; k++) {                ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][k - 1] = Expr[i][k];            }            ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][0]--;        }    }    //ShowShiftList();}void Init() {    InitKeyMap();    InitOperMap();    InitLimitMap();    tokenList.clear();    errorList.clear();    ReadExpr();    GetFirstSet();    GetFollowSet();    GetSelectSet();    GetShiftList();}#endif
View Code

WA.h:词法分析

技术分享
#ifndef __WA__#define __WA__#include "stdafx.h"#include "Macro_Struct.h"vector<pair<const char *, int>> keyMap;vector<pair<const char *, int>> operMap;vector<pair<const char *, int>> limitMap;vector<Token> tokenList;vector<Error> errorList;int SeekKey(char * word) {    for (int i = 0; i < keyMap.size(); i++) {        if (strcmp(word, keyMap[i].first) == 0) {            return keyMap[i].second;        }    }    return IDENTIFER;}void InsertToken(char * content, char * describe, int type, int line) {    Token tempToken;    strcpy_s(tempToken.content, content);    strcpy_s(tempToken.describe, describe);    tempToken.type = type;    tempToken.line = line;    tokenList.push_back(tempToken);}void InsertError(char * content, char * describe, int type, int line) {    Error tempError;    strcpy_s(tempError.content, content);    strcpy_s(tempError.describe, describe);    tempError.type = type;    tempError.line = line;    errorList.push_back(tempError);    printf("Line %d:%s\n", line, describe);}void preProcess(char * word, int line) {    regex INCLUDE_REGEX("#include\\s*<[\\w\\.]+>\\s*");    regex DEFINE_REGEX("#define\\s+\\w+\\s+\\w+\\s*");    if (regex_match(word, INCLUDE_REGEX)) {        return;    }    if (regex_match(word, DEFINE_REGEX)) {        return;    }    InsertError(word, PRE_PROCESS_ERROR, PRE_PROCESS_ERROR_NUM, line);}void Scan() {    char ch;    char array[30];//单词长度上限是30    char * word;    int i;    int line = 1;//行数    FILE * infile;    errno_t err = fopen_s(&infile, "input.txt", "r");    ch = fgetc(infile);    while (ch != EOF) {        i = 0;        //以字母或者下划线开头,处理关键字或者标识符        if ((ch >= A && ch <= Z) || (ch >= a && ch <= z) || ch == _) {            while ((ch >= A && ch <= Z) || (ch >= a && ch <= z) || (ch >= 0 && ch <= 9) || ch == _) {                array[i++] = ch;                ch = fgetc(infile);            }            word = new char[i + 1];            memcpy(word, array, i);            word[i] = \0;            int seekTemp = SeekKey(word);            if (seekTemp != IDENTIFER) {                InsertToken(word, KEY_DESC, seekTemp, line);            } else {                InsertToken(word, IDENTIFER_DESC, seekTemp, line);            }            fseek(infile, -1L, SEEK_CUR); //向后回退一位        }        //以数字开头,处理数字        else if (ch >= 0 && ch <= 9) {            int flag = 0;            int flag2 = 0;            //处理整数            while (ch >= 0 && ch <= 9) {                array[i++] = ch;                ch = fgetc(infile);            }            //处理float            if (ch == .) {                flag2 = 1;                array[i++] = ch;                ch = fgetc(infile);                if (ch >= 0 && ch <= 9) {                    while (ch >= 0 && ch <= 9) {                        array[i++] = ch;                        ch = fgetc(infile);                    }                } else {                    flag = 1;                }                //处理Double                if (ch == E || ch == e) {                    array[i++] = ch;                    ch = fgetc(infile);                    if (ch == + || ch == -) {                        array[i++] = ch;                        ch = fgetc(infile);                    }                    if (ch >= 0 && ch <= 9) {                        array[i++] = ch;                        ch = fgetc(infile);                    } else {                        flag = 2;                    }                }            }            word = new char[i + 1];            memcpy(word, array, i);            word[i] = \0;            if (flag == 1) {                InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line);            } else if (flag == 2) {                InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line);            } else {                if (flag2 == 0) {                    InsertToken(word, CONSTANT_DESC, INT_VAL, line);                } else {                    InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line);                }            }            fseek(infile, -1L, SEEK_CUR); //向后回退一位        }        //以"/"开头        else if (ch == /) {            ch = fgetc(infile);            //处理运算符"/="            if (ch == =) {                InsertToken("/=", OPE_DESC, COMPLETE_DIV, line);            }            //处理"/**/"型注释            else if (ch == *) {                ch =  fgetc(infile);                while (1) {                    while (ch != *) {                        if (ch == \n) {                            line++;                        }                        ch = fgetc(infile);                        if (ch == EOF) {                            InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line);                            return;                        }                    }                    ch = fgetc(infile);                    if (ch == /) {                        break;                    }                    if (ch == EOF) {                        InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line);                        return;                    }                }                InsertToken(_NULL, NOTE_DESC, NOTE1, line);            }            //处理"//"型注释            else if (ch == /) {                while (ch != \n) {                    ch = fgetc(infile);                    if (ch == EOF) {                        InsertToken(_NULL, NOTE_DESC, NOTE2, line);                        return;                    }                }                line++;                InsertToken(_NULL, NOTE_DESC, NOTE2, line);                if (ch == EOF) {                    return;                }            }            //处理除号            else {                InsertToken("/", OPE_DESC, DIV, line);            }        }        //处理常量字符串        else if (ch == ") {            InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line);            ch = fgetc(infile);            i = 0;            while (ch != ") {                array[i++] = ch;                if (ch == \n) {                    line++;                }                ch = fgetc(infile);                if (ch == EOF) {                    InsertError(_NULL, STRING_ERROR, STRING_ERROR_NUM, line);                    return;                }            }            word = new char[i + 1];            memcpy(word, array, i);            word[i] = \0;            InsertToken(word, CONSTANT_DESC, STRING_VAL, line);            InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line);        }        //处理常量字符        else if (ch == \‘) {            InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line);            ch = fgetc(infile);            i = 0;            while (ch != \‘) {                array[i++] = ch;                if (ch == \n) {                    line++;                }                ch = fgetc(infile);                if (ch == EOF) {                    InsertError(_NULL, CHARCONST_ERROR, CHARCONST_ERROR_NUM, line);                    return;                }            }            word = new char[i + 1];            memcpy(word, array, i);            word[i] = \0;            InsertToken(word, CONSTANT_DESC, CHAR_VAL, line);            InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line);        } else if (ch ==   || ch == \t || ch == \r || ch == \n) {            if (ch == \n) {                line++;            }        } else {            if (ch == EOF) {                return;            }            //处理头文件和宏常量(预处理)            else if (ch == #) {                while (ch != \n && ch != EOF) {                    array[i++] = ch;                    ch = fgetc(infile);                }                word = new char[i + 1];                memcpy(word, array, i);                word[i] = \0;                preProcess(word, line);                fseek(infile, -1L, SEEK_CUR); //向后回退一位            }            //处理-开头的运算符            else if (ch == -) {                array[i++] = ch;                ch = fgetc(infile);                if (ch >= 0 && ch <= 9) {                    int flag = 0;                    int flag2 = 0;                    //处理整数                    while (ch >= 0 && ch <= 9) {                        array[i++] = ch;                        ch = fgetc(infile);                    }                    //处理float                    if (ch == .) {                        flag2 = 1;                        array[i++] = ch;                        ch = fgetc(infile);                        if (ch >= 0 && ch <= 9) {                            while (ch >= 0 && ch <= 9) {                                array[i++] = ch;                                ch = fgetc(infile);                            }                        } else {                            flag = 1;                        }                        //处理Double                        if (ch == E || ch == e) {                            array[i++] = ch;                            ch = fgetc(infile);                            if (ch == + || ch == -) {                                array[i++] = ch;                                ch = fgetc(infile);                            }                            if (ch >= 0 && ch <= 9) {                                array[i++] = ch;                                ch = fgetc(infile);                            } else {                                flag = 2;                            }                        }                    }                    word = new char[i + 1];                    memcpy(word, array, i);                    word[i] = \0;                    if (flag == 1) {                        InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line);                    } else if (flag == 2) {                        InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line);                    } else {                        if (flag2 == 0) {                            InsertToken(word, CONSTANT_DESC, INT_VAL, line);                        } else {                            InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line);                        }                    }                    fseek(infile, -1L, SEEK_CUR); //向后回退一位                } else if (ch == >) {                    InsertToken("->", OPE_DESC, ARROW, line);                } else if (ch == -) {                    InsertToken("--", OPE_DESC, SELF_SUB, line);                } else if (ch == =) {                    InsertToken("--", OPE_DESC, SELF_SUB, line);                } else {                    InsertToken("-", OPE_DESC, SUB, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理+开头的运算符            else if (ch == +) {                ch = fgetc(infile);                if (ch == +) {                    InsertToken("++", OPE_DESC, SELF_ADD, line);                } else if (ch == =) {                    InsertToken("+=", OPE_DESC, COMPLETE_ADD, line);                } else {                    InsertToken("+", OPE_DESC, ADD, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理*开头的运算符            else if (ch == *) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("*=", OPE_DESC, COMPLETE_MUL, line);                } else {                    InsertToken("*", OPE_DESC, MUL, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理按^开头的运算符            else if (ch == ^) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("^=", OPE_DESC, COMPLETE_BYTE_XOR, line);                } else {                    InsertToken("^", OPE_DESC, BYTE_XOR, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理%开头的运算符            else if (ch == %) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("%=", OPE_DESC, COMPLETE_MOD, line);                } else {                    InsertToken("%", OPE_DESC, MOD, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理&开头的运算符            else if (ch == &) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("&=", OPE_DESC, COMPLETE_BYTE_AND, line);                } else if (ch == &) {                    InsertToken("&&", OPE_DESC, AND, line);                } else {                    InsertToken("&", OPE_DESC, BYTE_AND, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理~开头的运算符            else if (ch == ~) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("~=", OPE_DESC, COMPLETE_COMPLEMENT, line);                } else {                    InsertToken("~", OPE_DESC, COMPLEMENT, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理!开头的运算符            else if (ch == !) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("!=", OPE_DESC, NOT_EQUAL, line);                } else {                    InsertToken("!", OPE_DESC, NOT, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理<开头的运算符            else if (ch == <) {                ch = fgetc(infile);                if (ch == <) {                    InsertToken("<<", OPE_DESC, LEFT_MOVE, line);                } else if (ch == =) {                    InsertToken("<=", OPE_DESC, LES_EQUAL, line);                } else {                    InsertToken("<", OPE_DESC, LES_THAN, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理>开头的运算符            else if (ch == >) {                ch = fgetc(infile);                if (ch == >) {                    InsertToken(">>", OPE_DESC, RIGHT_MOVE, line);                } else if (ch == =) {                    InsertToken(">=", OPE_DESC, GRT_EQUAL, line);                } else {                    InsertToken(">", OPE_DESC, GRT_THAN, line);                    fseek(infile, -1L, SEEK_CUR);                }            }            //处理|开头的运算符            else if (ch == |) {                ch = fgetc(infile);                if (ch == |) {                    InsertToken("||", OPE_DESC, OR, line);                } else {                    InsertToken("|", OPE_DESC, BYTE_OR, line);                    fseek(infile, -1L, SEEK_CUR);                }            } else if (ch == =) {                ch = fgetc(infile);                if (ch == =) {                    InsertToken("==", OPE_DESC, EQUAL, line);                } else {                    InsertToken("=", OPE_DESC, ASG, line);                    fseek(infile, -1L, SEEK_CUR);                }            } else if (ch == () {                InsertToken("(", CLE_OPE_DESC, LEFT_BRA, line);            } else if (ch == )) {                InsertToken(")", CLE_OPE_DESC, RIGHT_BRA, line);            } else if (ch == [) {                InsertToken("[", CLE_OPE_DESC, LEFT_INDEX, line);            } else if (ch == ]) {                InsertToken("]", CLE_OPE_DESC, RIGHT_INDEX, line);            } else if (ch == {) {                InsertToken("{", CLE_OPE_DESC, L_BOUNDER, line);            } else if (ch == }) {                InsertToken("}", CLE_OPE_DESC, R_BOUNDER, line);            } else if (ch == .) {                InsertToken(".", CLE_OPE_DESC, POINTER, line);            } else if (ch == ,) {                InsertToken(",", CLE_OPE_DESC, COMMA, line);            } else if (ch == ;) {                InsertToken(";", CLE_OPE_DESC, SEMI, line);            } else {                char temp[2];                temp[0] = ch;                temp[1] = \0;                InsertError(temp, CHAR_ERROR, CHAR_ERROR_NUM, line);            }        }        ch = fgetc(infile);    }    fclose(infile);}#endif
View Code

SA.h:语法分析 

技术分享
#ifndef __SA__#define __SA__#include "stdafx.h"#include "Macro_Struct.h"#include "initialize.h"#define unScalsble false#define Scalable truevector<pair<char *, bool>> Signature;int CodeStack[5000], TryStack[5000], tokenId[5000];int Csize = 0, Tsize = 0;int Expr[100][100];int ExprNum = 0;bool First[100][100];bool visited[100];bool Follow[100][100];bool Select[100][100];int ShiftList[100][100][10][100];bool canBeNull[100];int ArrayFlag;int FindSignature(char * str) {    for (int i = 0; i < Signature.size(); i++) {        if (strcmp(str, Signature[i].first) == 0) {            return i;        }    }    return -1;}int Try(int top, int sn, int ptr) {    if (ptr < 0 || Tsize <= top) {        return -10;    }    int Expl = TryStack[top];    Tsize += (ShiftList[Expl][CodeStack[ptr]][sn][0] - 1);    for (int i = 1; i <= ShiftList[Expl][CodeStack[ptr]][sn][0]; i++) {        TryStack[Tsize - i] = ShiftList[Expl][CodeStack[ptr]][sn][i];    }    while (Tsize > top) {        if (ptr < 0) {            if (canBeNull[TryStack[Tsize - 1]]) {                Tsize--;                continue;            } else {                printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[0]].line, tokenList[tokenId[0]].content, Signature[Expl].first);                return -10;            }        }        if (TryStack[Tsize - 1] == CodeStack[ptr]) {            Tsize--;            ptr--;            continue;        }        if (Signature[TryStack[Tsize - 1]].second == unScalsble) {            printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);            return -10;        }        if (ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0] == 0) {            if (canBeNull[TryStack[Tsize - 1]]) {                Tsize--;                continue;            } else {                printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);                return -10;            }        }        bool Match = false;        for (int i = 1; i <= ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0]; i++) {            int tempTs = Tsize;            int tempTi = TryStack[Tsize - 1];            int ret = Try(Tsize - 1, i, ptr);            if (ret != -10) {                Match = true;                ptr = ret;                break;            } else {                Tsize = tempTs;                TryStack[Tsize - 1] = tempTi;            }        }        if (!Match) {            printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);            return -10;        }    }    return ptr;}void Analyse() {    for (int i = tokenList.size() - 1; i >= 0; i--) {        if (tokenList[i].type == AUTO ||                tokenList[i].type == CONST ||                tokenList[i].type == UNSIGNED ||                tokenList[i].type == SIGNED ||                tokenList[i].type == STATIC ||                tokenList[i].type == VOLATILE) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("describe");        } else if (tokenList[i].type == INT_VAL) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("digit");        } else if (tokenList[i].type == CHAR ||                   tokenList[i].type == DOUBLE ||                   tokenList[i].type == FLOAT ||                   tokenList[i].type == INT ||                   tokenList[i].type == LONG ||                   tokenList[i].type == SHORT ||                   tokenList[i].type == VOID) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("type");        } else if (tokenList[i].type == STRING_VAL) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("string");        } else if (tokenList[i].type == DOU_QUE ||                   tokenList[i].type == SIN_QUE ||                   tokenList[i].type == NOTE1 ||                   tokenList[i].type == NOTE2) {        } else if (tokenList[i].type == IDENTIFER) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("id");        } else if (tokenList[i].type == FOR) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("for");        } else if (tokenList[i].type == IF) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("if");        } else if (tokenList[i].type == ELSE) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("else");        } else if (tokenList[i].type == RETURN) {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature("return");        } else {            tokenId[Csize] = i;            CodeStack[Csize++] = FindSignature(tokenList[i].content);        }        if (tokenList[i].type != DOU_QUE && tokenList[i].type != SIN_QUE) {            printf("%15s  %15s  %15d\n", Signature[CodeStack[Csize - 1]].first, tokenList[i].content, tokenList[i].type);        }    }    ArrayFlag = -1;    while (Csize) {        if (ShiftList[0][CodeStack[Csize - 1]][0][0] == 0) {            printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content);            break;        }        bool Match = false;        for (int i = 1; i <= ShiftList[0][CodeStack[Csize - 1]][0][0]; i++) {            Tsize = 1;            TryStack[0] = 0;            int ret = Try(0, i, Csize - 1);            if (ret != -10) {                Match = true;                Csize = ret + 1;                break;            }        }        if (!Match) {            printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content);            break;        }    }    if (Csize == 0) {        printf("Successful\n");    }}#endif
View Code

 Macro_Struct.h:宏定义以及结构体定义

技术分享
#ifndef __MS__#define __MS__#define AUTO 1#define BREAK 2#define CASE 3#define CHAR 4#define CONST 5#define CONTINUE 6#define DEFAULT 7#define DO 8#define DOUBLE 9#define ELSE 10#define ENUM 11#define EXTERN 12#define FLOAT 13#define FOR 14#define GOTO 15#define IF 16#define INT 17#define LONG 18#define REGISTER 19#define RETURN 20#define SHORT 21#define SIGNED 22#define SIZEOF 23#define STATIC 24#define STRUCT 25#define SWITCH 26#define TYPEDEF 27#define UNION 28#define UNSIGNED 29#define VOID 30#define VOLATILE 31#define WHILE 32#define KEY_DESC "关键字"//标志符#define IDENTIFER 40#define IDENTIFER_DESC "标志符"//常量#define INT_VAL 51 //整形常量#define CHAR_VAL 52 //字符常量#define FLOAT_VAL 53 //浮点数常量#define STRING_VAL 54 //双精度浮点数常量#define MACRO_VAL 55 //宏常量#define CONSTANT_DESC "常量"//运算符#define NOT 61   // !#define BYTE_AND 62 //&#define COMPLEMENT 63 // ~#define BYTE_XOR  64 // ^#define MUL 65 // *#define DIV 66// /#define MOD 67 // %#define ADD 68 // +#define SUB 69 // -#define LES_THAN 70 // <#define GRT_THAN 71 // >#define ASG 72 // =#define ARROW 73 // ->#define SELF_ADD 74 // ++#define SELF_SUB 75 // --#define LEFT_MOVE 76 // <<#define RIGHT_MOVE 77 // >>#define LES_EQUAL 78 // <=#define GRT_EQUAL 79 // >=#define EQUAL 80 // ==#define NOT_EQUAL 81 // !=#define AND 82 // &&#define OR 83 // ||#define COMPLETE_ADD 84 // +=#define COMPLETE_SUB 85 // -=#define COMPLETE_MUL 86 // *=#define COMPLETE_DIV 87 // /=#define COMPLETE_BYTE_XOR 88 // ^=#define COMPLETE_BYTE_AND 89 // &=#define COMPLETE_COMPLEMENT 90 // ~=#define COMPLETE_MOD 91 //%=#define BYTE_OR 92 // |#define OPE_DESC "运算符"//限界符#define LEFT_BRA 100 // (#define RIGHT_BRA 101 // )#define LEFT_INDEX 102 // [#define RIGHT_INDEX 103 // ]#define L_BOUNDER 104 //  {#define R_BOUNDER 105 // }#define POINTER 106 // .#define JING 107 // ##define UNDER_LINE 108 // _#define COMMA 109 // ,#define SEMI 110 // ;#define SIN_QUE 111 //#define DOU_QUE 112 // "#define CLE_OPE_DESC "限界符"#define NOTE1 120 // "/**/"注释#define NOTE2 121 // "//"注释#define NOTE_DESC "注释"#define HEADER 130 //头文件#define HEADER_DESC "头文件"//错误类型#define FLOAT_ERROR "float表示错误"#define FLOAT_ERROR_NUM 1#define DOUBLE_ERROR "double表示错误"#define DOUBLE_ERROR_NUM 2#define NOTE_ERROR "注释没有结束符"#define NOTE_ERROR_NUM 3#define STRING_ERROR "字符串常量没有结束符"#define STRING_ERROR_NUM 4#define CHARCONST_ERROR "字符常量没有结束符"#define CHARCONST_ERROR_NUM 5#define CHAR_ERROR "非法字符"#define CHAR_ERROR_NUM 6#define LEFT_BRA_ERROR "‘(‘没有对应项"#define LEFT_BRA_ERROR_NUM 7#define RIGHT_BRA_ERROR "‘)‘没有对应项"#define RIGHT_BRA_ERROR_NUM 8#define LEFT_INDEX_ERROR "‘[‘没有对应项"#define LEFT_INDEX_ERROR_NUM 9#define RIGHT_INDEX_ERROR "‘]‘没有对应项"#define RIGHT_INDEX_ERROR_NUM 10#define L_BOUNDER_ERROR "‘{‘没有对应项"#define L_BOUNDER_ERROR_NUM 11#define R_BOUNDER_ERROR "‘}‘没有对应项"#define R_BOUNDER_ERROR_NUM 12#define PRE_PROCESS_ERROR "预处理错误" //头文件或者宏定义错误#define PRE_PROCESS_ERROR_NUM  13#define _NULL "无"#define DESCRIBE 4000#define TYPE 4001#define STRING 4002#define DIGIT 4003struct Token{    char content[30];//内容    char describe[30];//描述    int type;//种别码    int line;//所在行数};struct Error {    char content[30];//错误内容    char describe[30];//错误描述    int type;    int line;//所在行数};#endif
View Code

stdafx.h:头文件包含

#ifndef __STDAFX__#define __STDAFX__#include <iostream>#include <fstream>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <vector>#include <iomanip>#include <stack>#include <regex>using namespace std;#endif

input.txt:要检查的程序

一个简单的C语言语法检查器的实现