首页 > 代码库 > 词法分析器--DFA(c++实现)

词法分析器--DFA(c++实现)

语言名为TINY

实例程序:

begin
     var x,y:interger;
     x:=10;
     read(x);
     if y<0 then x:=x-y;
     x:=x+y;
     write(x);
end

TINY语言扫描程序的DFA:技术分享

代码

//ExplLexicalAnalyzer.h
#ifndef EXPLLEXICALANALYZER_H
#define EXPLLEXICALANALYZER_H


#define MAXTOKENLEN 40
#define MAXRESERVED 13


typedef enum {
    ENDFILE, ERROR,
    IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,
    ID, NUM,
    ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE
} TokenType;

//typedef struct {
//    TokenType kind;
//    int row = -1;
//    int column = -1;
//    double value;
//    std::string ID;
//} Token;


TokenType getToken(void);

#endif //LEARN_2_EXPLLEXICALANALYZER_H
  1 //ExplLexicalAnalyzer.cpp
  2 #include <cstdio>
  3 #include <iostream>
  4 #include <fstream>
  5 #include <cstring>
  6 #include  "ExplLexicalAnalyzer.h"
  7 
  8 using namespace std;
  9 
 10 typedef enum {
 11     START, INASSIGN, INCOMMENT, INNUM, INID, DONE
 12 } StateType;
 13 
 14 char tokenString[MAXTOKENLEN + 1];
 15 
 16 #define BUFLEN 256
 17 
 18 static char lineBuf[BUFLEN];
 19 static int linepos = 0;
 20 static int bufsize = 0;
 21 static int EOF_flag = false;
 22 static string filename;
 23 static fstream get;
 24 static int lineno = 0;
 25 static int columnpos = 0;
 26 bool TraceScan = true;
 27 StateType state;
 28 
 29 static struct {
 30     const char *str;
 31     TokenType tok;
 32 } reservedWords[MAXRESERVED]
 33         = {{"if",       IF},
 34            {"then",     THEN},
 35            {"else",     ELSE},
 36            {"end",      END},
 37            {"repeat",   REPEAT},
 38            {"until",    UNTIL},
 39            {"read",     READ},
 40            {"write",    WRITE},
 41            {"begin",    BEGIN},
 42            {"var",      VAR},
 43            {"interger", INTEGER},
 44            {"double",   DOUBLE},
 45            {"string",   STRING}};
 46 
 47 
 48 static char
 49 getNextChar() {
 50     if (linepos >= bufsize) {
 51         lineno = 0;
 52         if (state != START)
 53             return  ;
 54         if (get.getline(lineBuf, BUFLEN - 1)) {
 55             printf("%d: %s\n", columnpos++, lineBuf);
 56             bufsize = (int) strlen(lineBuf);
 57             linepos = 0;
 58             return lineBuf[linepos++];
 59         } else {
 60             return EOF;
 61         }
 62     } else return lineBuf[linepos++];
 63 }
 64 
 65 
 66 static TokenType reservedLookup(char *s) {
 67     int i;
 68     for (i = 0; i < MAXRESERVED; i++)
 69         if (!strcmp(s, reservedWords[i].str))
 70             return reservedWords[i].tok;
 71     return ID;
 72 }
 73 
 74 //退回一个字符
 75 static void ungetNextChar(void) { if (!EOF_flag) linepos--; }
 76 
 77 //打印分析结果
 78 void printToken(TokenType token, const char *tokenString) {
 79     switch (token) {
 80         case IF:
 81         case THEN:
 82         case ELSE:
 83         case END:
 84         case REPEAT:
 85         case UNTIL:
 86         case READ:
 87         case WRITE:
 88         case BEGIN:
 89         case VAR:
 90         case INTEGER:
 91         case DOUBLE:
 92         case STRING:
 93             printf("reserved word: %s\n", tokenString);
 94             break;
 95         case DEFINE:
 96             printf(":\n");
 97             break;
 98         case COMMA:
 99             printf(",\n");
100             break;
101         case ASSIGN:
102             printf(":=\n");
103             break;
104         case LT:
105             printf("<\n");
106             break;
107         case EQ:
108             printf("=\n");
109             break;
110         case LPAREN:
111             printf("(\n");
112             break;
113         case RPAREN:
114             printf(")\n");
115             break;
116         case SEMI:
117             printf(";\n");
118             break;
119         case PLUS:
120             printf("+\n");
121             break;
122         case MINUS:
123             printf("-\n");
124             break;
125         case TIMES:
126             printf("*\n");
127             break;
128         case OVER:
129             printf("/\n");
130             break;
131         case ENDFILE:
132             break;
133         case NUM:
134             printf("NUM, val= %s\n", tokenString);
135             break;
136         case ID:
137             printf("ID, name= %s\n", tokenString);
138             break;
139         case ERROR:
140             printf("ERROR: %s\n", tokenString);
141             break;
142         default:
143             printf("Unknown token: %d\n", token);
144     }
145 }
146 
147 
148 //词法分析
149 TokenType getToken(void) {
150     int tokenStringIndex = 0;
151     TokenType currentToken;
152     state = START;
153     bool save;
154     while (state != DONE) {
155         char c = getNextChar();
156         save = true;
157         switch (state) {
158             case START:
159                 if (isdigit(c))
160                     state = INNUM;
161                 else if (isalpha(c))
162                     state = INID;
163                 else if (c == :)
164                     state = INASSIGN;
165                 else if ((c ==  ) || (c == \t) || (c == \n))
166                     save = false;
167                 else if (c == {) {
168                     save = false;
169                     state = INCOMMENT;
170                 } else {
171                     state = DONE;
172                     switch (c) {
173                         case EOF:
174                             return ENDFILE;
175                         case ,:
176                             currentToken = COMMA;
177                             break;
178                         case =:
179                             currentToken = EQ;
180                             break;
181                         case <:
182                             currentToken = LT;
183                             break;
184                         case +:
185                             currentToken = PLUS;
186                             break;
187                         case -:
188                             currentToken = MINUS;
189                             break;
190                         case *:
191                             currentToken = TIMES;
192                             break;
193                         case /:
194                             currentToken = OVER;
195                             break;
196                         case (:
197                             currentToken = LPAREN;
198                             break;
199                         case ):
200                             currentToken = RPAREN;
201                             break;
202                         case ;:
203                             currentToken = SEMI;
204                             break;
205                         default:
206                             currentToken = ERROR;
207                             break;
208                     }
209                 }
210                 break;
211             case INCOMMENT:
212                 save = false;
213                 if (c == EOF) {
214                     state = DONE;
215                     currentToken = ENDFILE;
216                 } else if (c == }) state = START;
217                 break;
218             case INASSIGN:
219                 state = DONE;
220                 if (c == =)
221                     currentToken = ASSIGN;
222                 else {
223                     currentToken = DEFINE;
224                     ungetNextChar();
225                 }
226                 break;
227             case INNUM:
228                 if (!isdigit(c)) {
229                     ungetNextChar();
230                     save = false;
231                     state = DONE;
232                     currentToken = NUM;
233                 }
234                 break;
235             case INID:
236                 if (!isalpha(c)) {
237                     tokenString[tokenStringIndex] = \0;
238                     if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {
239                         save = false;
240                         state = DONE;
241                         currentToken = ID;
242                         break;
243                     }
244                     ungetNextChar();
245                     save = false;
246                     state = DONE;
247                     currentToken = ID;
248                 }
249                 break;
250             case DONE:
251                 break;
252         }
253         if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))
254             tokenString[tokenStringIndex++] = c;
255         if (state == DONE) {
256             tokenString[tokenStringIndex] = \0;
257             if (currentToken == ID)
258                 currentToken = reservedLookup(tokenString);
259         }
260     }
261     if (TraceScan) {
262         printf("\t%d: ", lineno++);
263         printToken(currentToken, tokenString);
264     }
265     return currentToken;
266 }
267 
268 
269 int
270 main() {
271     if (cin >> filename && filename == "q") {
272         filename = "......";
273     }
274     get.open(filename, ios::in);
275     while (getToken() != ENDFILE);
276 }

 运行结果:

技术分享

词法分析器--DFA(c++实现)