首页 > 代码库 > 删除C语言程序中所有的注释语句,代码实现

删除C语言程序中所有的注释语句,代码实现

学习《C程序设计语言》到第1章最后,有一道题目:

编写一个删除C语言程序中所有的注释语句。要正确处理带引号的字符串与字符常量。在C语言中,注释不允许嵌套。
Exercise 1-23. Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments don't nest.
刚开始,我用一种brute-force的方式,对每个字符进行遍历,然后进行判断,有没有进入注释。

这样做有一个非常麻烦的问题:只有连续检测到“//”或者"/*"时,才确定进入了注释状态;如果我们只检测到1个"/",后面就是其它字符串的话,还得将这个单独的"/"打印出来。

而且有许多的分支状态,如果用if/else的话,会难以理解,并容易出错。

在网上搜了一下,发现有一种解法非常好:状态机。在各种状态之间跳转,逻辑清晰,不易出错,出错了也容易调试。

下面把代码贴出来:

#include <stdio.h>

int state;

int c1,c2;

void change_state(int c);

int main(int argc, const char * argv[]) {
    int c;
    state = 0;
    c1 = 0;
    c2 = 0;
    while ((c=getchar())!=EOF) {
        c1 = c2;
        c2 = c;
        change_state(c);
    }
    if (/* DISABLES CODE */ (0)==1) {
        printf("just test://abcd");
        printf("just test:/*hello*/");
    }
}

/*状态机函数*/
void change_state(int c){
    if (state==0) {//普通状态
        if (c=='/') {
            state = 1;
        }else if (c=='"'){
            state = 5;
            putchar(c);
        }else if (c=='\''){
            state = 6;
            putchar(c);
        }
        else{
            state = 0;
            putchar(c);
        }
    }else if (state==1) {//检测到1个'/'
        if (c=='/') {
            state = 2;
        }else if (c=='*'){
            state = 3;
        }else{
            state = 0;
            putchar(c1);
            putchar(c);
        }
    }else if (state==2) {// "//"注释状态
        if (c=='\n') {
            state = 0;
            putchar(c);
        }else{
            state = 2;
        }
    }else if (state==3) {// "/*"注释状态
        if (c=='*') {
            state = 4;
        }else{
            state = 3;
        }
    }else if (state==4) {
        if (c=='/') {
            state = 0;
        }else{
            state = 3;
        }
    }else if (state==5){//在"字符串里
        if (c=='"') {
            state = 0;
            putchar(c);
        }else if(c=='\\'){
            state = 7;
            putchar(c);
        }else{
            state = 5;
            putchar(c);
        }
    }else if (state==6){//在'字符里
        if (c=='\'') {
            state = 0;
            putchar(c);
        }else if(c=='\\'){
            state = 8;
            putchar(c);
        }else{
            state = 6;
            putchar(c);
        }
    }else if (state==7){//在"字符串里的"\"
        state = 5;
        putchar(c);
    }else if (state==8){//在'字符串里的"\"
        state = 6;
        putchar(c);
    }
}

以本段代码作为输入,结果如下:

#include <stdio.h>

int state;

int c1,c2;

void change_state(int c);

int main(int argc, const char * argv[]) {
    int c;
    state = 0;
    c1 = 0;
    c2 = 0;
    while ((c=getchar())!=EOF) {
        c1 = c2;
        c2 = c;
        change_state(c);
    }
    if ( (0)==1) {
        printf("just test://abcd");
        printf("just test:/*hello*/");
    }
}


void change_state(int c){
    if (state==0) {
        if (c=='/') {
            state = 1;
        }else if (c=='"'){
            state = 5;
            putchar(c);
        }else if (c=='\''){
            state = 6;
            putchar(c);
        }
        else{
            state = 0;
            putchar(c);
        }
    }else if (state==1) {
        if (c=='/') {
            state = 2;
        }else if (c=='*'){
            state = 3;
        }else{
            state = 0;
            putchar(c1);
            putchar(c);
        }
    }else if (state==2) {
        if (c=='\n') {
            state = 0;
            putchar(c);
        }else{
            state = 2;
        }
    }else if (state==3) {
        if (c=='*') {
            state = 4;
        }else{
            state = 3;
        }
    }else if (state==4) {
        if (c=='/') {
            state = 0;
        }else{
            state = 3;
        }
    }else if (state==5){
        if (c=='"') {
            state = 0;
            putchar(c);
        }else if(c=='\\'){
            state = 7;
            putchar(c);
        }else{
            state = 5;
            putchar(c);
        }
    }else if (state==6){
        if (c=='\'') {
            state = 0;
            putchar(c);
        }else if(c=='\\'){
            state = 8;
            putchar(c);
        }else{
            state = 6;
            putchar(c);
        }
    }else if (state==7){
        state = 5;
        putchar(c);
    }else if (state==8){
        state = 6;
        putchar(c);
    }
perfect!

感谢@roma823 及其文章:http://blog.csdn.net/roma823/article/details/6364849

删除C语言程序中所有的注释语句,代码实现