首页 > 代码库 > 简易词法分析器
简易词法分析器
最近开始学习编译器,希望能坚持下去,通过做编译器也把c++练好点,今天花了一天做了一个简单的词法分析器,过几天编译原理书到了后,希望能通过看书得到更好的方法。
//learn.cpp
#include <iostream>#include "learn.h"#include <fstream>using namespace std;static char *st_line;static int st_line_pos;int length;typedef enum { INITIAL_STATUS, IN_INT_PART_STATUS, IN_FRAC_PART_STATUS, DOT_STATUS,} ParseStatus;voidget_token(Token *token) { int out_pos = 0; ParseStatus status = INITIAL_STATUS; char current_char; string temp; token->kind = BAD_TOKEN; while (st_line[st_line_pos] != ‘\0‘) { current_char = st_line[st_line_pos]; if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS) && !isdigit(current_char) && current_char != ‘.‘) { token->kind = NUMBER_TOKEN; sscanf(token->str, "%lf", &token->value); return; } if (isalpha(current_char)) { while (current_char != ‘=‘ && current_char != ‘;‘ && current_char != ‘<‘ && current_char != ‘>‘ && current_char != ‘+‘ && current_char != ‘-‘ && current_char != ‘*‘ && current_char != ‘/‘ && current_char != ‘(‘ && current_char != ‘)‘) { temp += current_char; token->str[out_pos] = st_line[st_line_pos]; ++st_line_pos; ++out_pos; current_char = st_line[st_line_pos]; token->str[out_pos + 1] = ‘\0‘; if (temp == "if") { token->kind = IF_OPERATOR_TOKEN; return; } else if (temp == "while") { token->kind = WHILE_OPERATOR_TOKEN; return; } else if (temp == "for") { token->kind = FOR_PAREN_TOKEN; return; } else if (temp == "int") { token->kind = INT_TOKEN; return; } else if (temp == "double") { token->kind = DOUBLE_TOKEN; return; } else if (temp == "string") { token->kind = STRING_TOKEN; return; } else if (temp == "cout") { token->kind = COUT_TOKEN; return; } else if (temp == "return") { token->kind = RETURN_TOKEN; return; } else if (temp == "main") { token->kind = MAIN_TOKEN; return; } } token->kind = VARIABLE_NAME_TOKEN; token->string_value =http://www.mamicode.com/ temp; token->str[out_pos] = ‘\0‘; return; } if ((current_char == ‘"‘ || current_char == ‘\‘‘) && status == INITIAL_STATUS) { current_char = st_line[++st_line_pos]; while (current_char != ‘"‘ && current_char != ‘\‘‘) { token->str[out_pos] = st_line[st_line_pos]; out_pos++; temp += current_char; current_char = st_line[++st_line_pos]; } token->string_value =http://www.mamicode.com/ temp; token->kind = STRING_VARIABLE_TOKEN; st_line_pos++; token->str[++out_pos] = ‘\0‘; return; } if (isspace(current_char)) { if (status == INITIAL_STATUS) { st_line_pos++; continue; } } token->str[out_pos] = st_line[st_line_pos]; st_line_pos++; out_pos++; token->str[out_pos] = ‘\0‘; if (current_char == ‘+‘) { if (st_line[st_line_pos] == ‘+‘) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = ‘\0‘; ++st_line_pos; token->kind = SELF_PLUS_ONE_OPERATOR_TOKEN; return; } else { token->kind = ADD_OPERATOR_TOKEN; return; } } else if (current_char == ‘-‘) { if (st_line[st_line_pos] == ‘-‘) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = ‘\0‘; ++st_line_pos; token->kind = SELF_SUB_ONE_OPERATOR_TOKEN; return; } else { token->kind = SUB_OPERATOR_TOKEN; return; } } else if (current_char == ‘<‘) { if (st_line[st_line_pos] == ‘<‘) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = ‘\0‘; ++st_line_pos; token->kind = OUT_OPERATER_TOKEN; return; } else { token->kind = LESS_OPERATER_TOKEN; return; } } else if (current_char == ‘>‘) { if (st_line[st_line_pos] == ‘>‘) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = ‘\0‘; ++st_line_pos; token->kind = GET_OPERATER_TOKEN; return; } else { token->kind = GREATE_OPERATER_TOKEN; return; } } else if (current_char == ‘*‘) { token->kind = MUL_OPERATOR_TOKEN; return; } else if (current_char == ‘/‘) { token->kind = DIV_OPERATOR_TOKEN; return; } else if (current_char == ‘;‘) { token->kind = END_OF_LINE_TOKEN; return; } else if (current_char == ‘{‘) { token->kind = LEFT_BRACE_TOKEN; return; } else if (current_char == ‘}‘) { token->kind = RIGHT_BRACE_TOKEN; return; } else if (current_char == ‘(‘) { token->kind = LEFT_PARENTHESS_TOKEN; return; } else if (current_char == ‘)‘) { token->kind = RIGHT_PARENTHESE_TOKEN; return; } else if (current_char == ‘=‘) { if (st_line[st_line_pos] == ‘=‘) { token->kind = EQUAL_PAREN_TOKEN; return; } else { token->kind = ASSIGMENT_PAREN_TOKEN; return; } } else if (isdigit(current_char)) { if (status == INITIAL_STATUS) { status = IN_INT_PART_STATUS; } else if (status == DOT_STATUS) { status = IN_FRAC_PART_STATUS; } } else if (current_char == ‘.‘) { if (status == IN_INT_PART_STATUS) { status = DOT_STATUS; } else { fprintf(stderr, "syntax error.\n"); exit(1); } } }}//void//set_line(char *line) {// st_line = line;// st_line_pos = 0;//}voidparse_line(string filename) { Token token; st_line_pos = 0; char temp, test[1024]; ifstream get; get.open(filename); int i = 0; while (!get.eof()) { get.read(&temp, 1); test[i++] = temp; } length = i; st_line = test; for (;;) { get_token(&token); if (st_line_pos == length) { return; } printf("kind..%d, str..%s\n", token.kind, token.str); }}int main() { string filename = "/home/liuyu/文档/test"; parse_line(filename); return 0;}
//learn.h#ifndef LEARN_LEARN_H#define LEARN_LEARN_H#include <iostream>using namespace std;typedef enum { BAD_TOKEN, NUMBER_TOKEN, STRING_TOKEN, INT_TOKEN, DOUBLE_TOKEN, COUT_TOKEN, RETURN_TOKEN, MAIN_TOKEN, IF_OPERATOR_TOKEN, OUT_OPERATER_TOKEN, GET_OPERATER_TOKEN, LESS_OPERATER_TOKEN, GREATE_OPERATER_TOKEN, LEFT_BRACE_TOKEN, RIGHT_BRACE_TOKEN, LEFT_PARENTHESS_TOKEN, RIGHT_PARENTHESE_TOKEN, WHILE_OPERATOR_TOKEN, EQUAL_PAREN_TOKEN, FOR_PAREN_TOKEN, ASSIGMENT_PAREN_TOKEN, END_OF_LINE_TOKEN, VARIABLE_NAME_TOKEN, STRING_VARIABLE_TOKEN, ADD_OPERATOR_TOKEN, SUB_OPERATOR_TOKEN, MUL_OPERATOR_TOKEN, DIV_OPERATOR_TOKEN, SELF_PLUS_ONE_OPERATOR_TOKEN, SELF_SUB_ONE_OPERATOR_TOKEN, END_TOKEN} TokenKind;#define MAX_TOKEN_SIZE (100)typedef struct { TokenKind kind; double value; string string_value; char str[MAX_TOKEN_SIZE];} Token;void set_line(char *line);void get_token(Token *token);#endif
测试:
简易词法分析器
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。