首页 > 代码库 > 计算机系统要素:第十一章 编译器:代码生成
计算机系统要素:第十一章 编译器:代码生成
一,项目介绍
终于来到了编译器部分的最后一个章节——代码生成阶段。本章的目标就是将Jack语言转化为VM语言,完成Jack编译器的构建。
刚刚接触这章的内容时,会比较难上手,最主要的问题就在于,这章的内容看起来和第十章没有什么关系。刚开始做这个项目时,我就很疑惑,第十章输出的不是一个结构化的xml文件吗?这个文件在第十一章根本不需要输出,那么这章的内容从何开始呢?
的确,这个xml文件是不需要输出的,但是第十章的目的并不单纯是输出这个xml文件,它更重要的目的是为了让我们了解如何对jack程序文件进行语法分析,以完成CompilationEngine的构建。所以,我们需要关注的是CompilationEngine的函数结构,这个函数结构才是第十一章内容的基础。
二,操作步骤
总体而言,作者为我们设计的操作顺序是非常合理的。在此,我再提出几点预备步骤,这些步骤并不是必要的,但是通过这些操作,能够使得整个项目的实现更加流畅。
1,先给命令行加上-x选项,如果命令行中出现-x,则表示输出xml文件和VM文件,不加-x,则表示只输出VM文件。这样子就将两个不同“写入文件流”区分开来。
2,构建符号表模块。存储符号表时,我所用的数据结构是Python语言中的二维列表。这一阶段的任务是把每一个遇到的Identifier都加以标注并且输出相关信息。
3,进入输出VM语言的阶段。首先可以使用内置的JackCompiler将Jack语言转换为VM语言,(Windows上的JackCompiler需要自己设置配置文件才能够使用,具体教程在这儿)从简单的文件开始转换,自己认真分析代码的转换过程。例如,最简单的Seven函数的Jack代码和VM代码分别如下。
class Main { function void main() { do Output.printInt(1 + (2 * 3)); return; } }
function Main.main 0 push constant 1 push constant 2 push constant 3 call Math.multiply 2 add call Output.printInt 1 pop temp 0 push constant 0 return
之后你便可以对照二者,分析转换规则了,例如第一句function Main.main 0肯定是在读取完了所有的ClassVarDev,知道了函数名之后才写入的,于是,写入语句必然就是在compileStatements之前。照这个步骤,逐步完善你的编译器。
三,注意点
我的建议是,先回过头去复习VM代码和Jack语言,了解高级代码转化为VM代码的具体过程,你可以通过看图11.6,图7.9来了解其中的逻辑。
在写编译器的过程中,注意点非常多,这一方面,书中11.2节阐述的非常清楚,在此我重申几个比较关键的问题:
1,constructor, method和function参数配置不同,method方法会默认带一个this的参数,需要加以区分。而讨论参数时,VM代码中function xxx n与call function m中的n与m也是不同的,前者指的是函数中的局部变量数(local),后者指的是调用函数时引入的参数(argument)。
2,Function和Method的调用方式不同,Function只需调用类名ClassName.Function就可以使用,但是method需要调用具体的类实例如abc.Method才可以调用,如果方法就在类中的话,也可直接使用method()。
3,数组只可能在两个地方出现,一是term中,用于引用,另外是Let语句的左边,用于数组赋值。要注意的是,这两处调用的VM代码是不同的,需要加以区分。
4,constructor是构造函数,在编译时,需要先分析Class中有多少个field变量,然后使用Memory.alloc(size)来给他们分配空间,最后再将其基地址存入this指针中。
上述这些注意点的具体代码都可以通过JackCompiler编译现有文件而得到,我就不再赘述了。
最后,debug的过程是痛苦的,也是无可避免的。如果代码出现问题,可以比对JackCompiler的输出文件与你的编译器输出文件的不同。这个过程能够是你对编译有更深的理解。
JackCompiler.py
#!/usr/bin/python import CompilationEngine import SymbolTable import sys,os ''' The command line of this module is : JackCompiler.py (-x) sourcename The first option is -x, which decides whether to run xmlWriter() and to output the constructive xml fileputting forward by CompilationEngine. ''' option=sys.argv[1] if option == '-x': filename=sys.argv[2] else: filename=sys.argv[1] #clear all the // /* ... notes, create a new file to save the result readfile = open(filename,'r') copyfile = open('copyfile','w') line=readfile.readline() while line: while line == '\n' or line.startswith('//'): line=readfile.readline() if '//' in line: line=line[:line.find('//')] if '/*' in line: aline=line[:line.find('/*')] while line.find('*/')<0: line=readfile.readline() bline=line[line.find('*/')+2:] line=aline+bline copyfile.write(line) line=readfile.readline() copyfile.close() readfile.close() #Main Function readCopyFile=open('copyfile','r') writeXmlFile=open(filename.strip('.jack')+'.xml','w') writeVmFile=open(filename.strip('.jack')+'.vm','w') outputCompile=CompilationEngine.Compile(readCopyFile,writeXmlFile,writeVmFile) outputCompile.compileClass() readCopyFile.close() writeXmlFile.close() writeVmFile.close() os.remove('copyfile') if option != '-x': os.remove(filename.strip('.jack')+'.xml')
CompilationEngine.py
#!/usr/bin/python import JackTokenizer import SymbolTable import VMWriter class Compile(): def __init__(self,rfile,wfile,wVmFile): self.rfile=rfile self.wfile=wfile #Write XML file self.vmWriter=VMWriter.VMwriter(wVmFile) #Write VM file self.tokenizer=JackTokenizer.Tokenizer(self.rfile) self.class_symbol=SymbolTable.SymbolTable() self.sub_symbol=SymbolTable.SymbolTable() self.Stype='' #Stype records the type of the identifier. self.Skind='' #ClassName records the name of the class, used to make the sub_functionName self.ClassName='' self.expressionListNum=0 #Record the number of expression in ExpressionList. self.WHILEFLAG=0 #the index of while_loop in case of tautonomy self.IFFLAG=0 def writeXmlTag(self,token): self.wfile.write(token) def writeXml(self,tType,token): if tType == 'symbol': if self.tokenizer.token=='>': self.writeXmlTag('<'+tType+'> '+'>'+' </'+tType+'>\n') elif self.tokenizer.token=='<': self.writeXmlTag('<'+tType+'> '+'<'+' </'+tType+'>\n') elif self.tokenizer.token=='&': self.writeXmlTag('<'+tType+'> '+'&'+' </'+tType+'>\n') else: self.writeXmlTag('<'+tType+'> '+token+' </'+tType+'>\n') else: self.writeXmlTag('<'+tType+'> '+token+' </'+tType+'>\n') def NextToken(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def moveBack(self): #Move back to the last token. lennum=-len(self.tokenizer.token) self.rfile.seek(lennum,1) def writeArrayPush(self,symbolName): #This function is used in 'Push' Array Terms. SubTag=self.sub_symbol.FoundName(symbolName) if SubTag==-1: ClassTag=self.class_symbol.FoundName(symbolName) if ClassTag==-1: print 'Error Term!' exit() else: self.vmWriter.writePush('this',self.class_symbol.Scope[ClassTag][3]) else: KINDFLAG=self.sub_symbol.Scope[SubTag][2] self.vmWriter.writePush(KINDFLAG,self.sub_symbol.Scope[SubTag][3]) def defineSymbol(self,symbolName,_symbol): #This function adds symbolName into SymbolTable. _symbol.Define(symbolName,self.Stype,self.Skind) def checkSymbol(self,symbolName): #Check the index of the Identifier SubTag=self.sub_symbol.FoundName(symbolName) if SubTag==-1: ClassTag=self.class_symbol.FoundName(symbolName) if ClassTag==-1: return -1 else: return self.class_symbol.Scope[ClassTag] else: return self.sub_symbol.Scope[SubTag] def compileType(self): tType=self.tokenizer.tokenType() if tType == 'KEYWORD': self.Stype=self.tokenizer.token self.writeXml('keyword',self.tokenizer.token) elif tType == 'IDENTIFIER': self.Stype=self.tokenizer.token self.writeXml('identifier',self.tokenizer.token) def compileTermType(self): tType=self.tokenizer.tokenType() if tType == 'KEYWORD': kWord=self.tokenizer.token if kWord=='true': self.vmWriter.writePush('constant',1) self.vmWriter.writeArithmetic('neg') elif kWord=='false' or kWord=='null': self.vmWriter.writePush('constant',0) elif kWord=='this': self.vmWriter.writePush('pointer',0) self.writeXml('keyword',self.tokenizer.token) elif tType == 'INT_CONSTANT': self.writeXml('integerConstant',self.tokenizer.token) self.vmWriter.writePush('constant',int(self.tokenizer.token)) elif tType == 'STRING_CONSTANT': string_copy=self.tokenizer.token.strip('"') self.writeXml('stringConstant',string_copy) string_length=len(string_copy) self.vmWriter.writePush('constant',string_length) self.vmWriter.writeCall('String.new',1) for i in range(0,string_length): self.vmWriter.writePush('constant',ord(string_copy[i])) self.vmWriter.writeCall('String.appendChar',2) def compileVarDec(self): ''' var type varName(,'varName')*; ''' self.writeXmlTag('<varDec>\n') self.writeXml('keyword','var') self.Skind='var' #type self.NextToken() self.compileType() #varName self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.sub_symbol) #(,varName)* self.NextToken() while self.tokenizer.token != ';': self.writeXml('symbol',self.tokenizer.token) self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.sub_symbol) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</varDec>\n') def compileParameterList(self): ''' ((type varName)(, type varName)*)? ''' self.writeXmlTag('<parameterList>\n') self.NextToken() while self.tokenizer.token != ')': self.Skind='argument' if self.tokenizer.token != ',': self.compileType() self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.sub_symbol) self.NextToken() else: self.writeXml('symbol',self.tokenizer.token) self.NextToken() self.compileType() self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.sub_symbol) self.NextToken() self.writeXmlTag('</parameterList>\n') def compileClassVarDec(self): ''' ('static'|'field') type varName(, varName)*; ''' self.writeXmlTag('<classVarDec>\n') self.writeXml('keyword',self.tokenizer.token) self.Skind=self.tokenizer.token self.NextToken() self.compileType() #varName self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.class_symbol) #(,varName)* self.NextToken() while self.tokenizer.token != ';': self.writeXml('symbol',self.tokenizer.token) self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.defineSymbol(self.tokenizer.token,self.class_symbol) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</classVarDec>\n') def compileTerm(self): self.writeXmlTag('<term>\n') self.NextToken() tType=self.tokenizer.tokenType() if tType == 'IDENTIFIER': temp=self.rfile.read(1) if temp=='.': lennum=-len(self.tokenizer.token)-1 self.rfile.seek(lennum,1) self.subroutineCall() elif temp=='[': self.writeXml('identifier',self.tokenizer.token) self.writeArrayPush(self.tokenizer.token) self.writeXml('symbol','[') self.compileExpression() self.vmWriter.writeArithmetic('add') self.vmWriter.writePop('pointer',1) self.vmWriter.writePush('that',0) self.writeXml('symbol',']') else: self.rfile.seek(-1,1) self.writeXml('identifier',self.tokenizer.token) ListSeg=self.checkSymbol(self.tokenizer.token) self.vmWriter.writePush(ListSeg[2],ListSeg[3]) elif self.tokenizer.token in ('-','~'): UnaryOp=self.tokenizer.token self.writeXml('symbol',self.tokenizer.token) self.compileTerm() if UnaryOp == '-': self.vmWriter.writeArithmetic('neg') else: self.vmWriter.writeArithmetic('not') elif self.tokenizer.token == '(': self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.writeXml('symbol',')') else: self.compileTermType() self.writeXmlTag('</term>\n') def compileExpression(self): ''' term (op term)* ''' self.writeXmlTag('<expression>\n') self.compileTerm() self.NextToken() while (self.tokenizer.tokenType() == 'SYMBOL' and self.tokenizer.Symbol() in '+-*/&|<>='): operator = self.tokenizer.Symbol() self.writeXml('symbol', self.tokenizer.token) self.compileTerm() if operator == '+': self.vmWriter.writeArithmetic('add') elif operator == '-': self.vmWriter.writeArithmetic('sub') elif operator == '*': self.vmWriter.writeCall('Math.multiply', 2) elif operator == '/': self.vmWriter.writeCall('Math.divide', 2) elif operator == '&': self.vmWriter.writeArithmetic('and') elif operator == '|': self.vmWriter.writeArithmetic('or') elif operator == '<': self.vmWriter.writeArithmetic('lt') elif operator == '>': self.vmWriter.writeArithmetic('gt') elif operator == '=': self.vmWriter.writeArithmetic('eq') self.NextToken() self.writeXmlTag('</expression>\n') def compileExpressionList(self): self.writeXmlTag('<expressionList>\n') self.expressionListNum=0 self.NextToken() while self.tokenizer.token != ')': if self.tokenizer.token != ',': self.moveBack() self.compileExpression() self.expressionListNum+=1 else: self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.expressionListNum+=1 self.writeXmlTag('</expressionList>\n') def subroutineCall(self): sub_MethodFlag=False self.NextToken() self.writeXml('identifier',self.tokenizer.token) sub_className=self.tokenizer.token self.NextToken() if self.tokenizer.token=='.': self.writeXml('symbol',self.tokenizer.token) self.NextToken() self.writeXml('identifier',self.tokenizer.token) sub_funcName=self.tokenizer.token #To check if sub_className is a ClassName or an instance SubCallTag=self.sub_symbol.FoundName(sub_className) if SubCallTag==-1: ClassCallTag=self.class_symbol.FoundName(sub_className) if ClassCallTag==-1: sub_Name=sub_className+'.'+sub_funcName else: sub_MethodFlag=True sub_className=self.class_symbol.Scope[ClassCallTag][1] sub_index=self.class_symbol.Scope[ClassCallTag][3] self.vmWriter.writePush('this',sub_index) sub_Name=sub_className+'.'+sub_funcName else: sub_MethodFlag=True sub_className=self.sub_symbol.Scope[SubCallTag][1] sub_index=self.sub_symbol.Scope[SubCallTag][3] self.vmWriter.writePush('local',sub_index) sub_Name=sub_className+'.'+sub_funcName self.rfile.read(1) self.writeXml('symbol','(') self.compileExpressionList() self.writeXml('symbol',')') if sub_MethodFlag: self.vmWriter.writeCall(sub_Name,self.expressionListNum+1) else: self.vmWriter.writeCall(sub_Name,self.expressionListNum) elif self.tokenizer.token=='(': sub_Name=self.ClassName+'.'+sub_className self.writeXml('symbol','(') self.vmWriter.writePush('pointer',0) self.compileExpressionList() self.vmWriter.writeCall(sub_Name,self.expressionListNum+1) self.writeXml('symbol',')') def compileDo(self): self.writeXmlTag('<doStatement>\n') self.writeXml('keyword',self.tokenizer.token) self.subroutineCall() self.vmWriter.writePop('temp',0) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</doStatement>\n') def compileLet(self): ''' If the term on the left of '=' is Array, the order of the VM code is totally different from other conditions. ''' self.writeXmlTag('<letStatement>\n') self.writeXml('keyword',self.tokenizer.token) self.NextToken() self.writeXml('identifier',self.tokenizer.token) LetVarName=self.tokenizer.token ListSeg=self.checkSymbol(LetVarName) self.NextToken() temp=self.tokenizer.token if temp=='[': self.writeArrayPush(LetVarName) self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.writeXml('symbol',']') self.vmWriter.writeArithmetic('add') self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.vmWriter.writePop('temp',0) self.vmWriter.writePop('pointer',1) self.vmWriter.writePush('temp',0) self.vmWriter.writePop('that',0) self.writeXml('symbol',';') self.writeXmlTag('</letStatement>\n') elif temp == '=': self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.vmWriter.writePop(ListSeg[2],ListSeg[3]) self.writeXml('symbol',';') self.writeXmlTag('</letStatement>\n') def compileWhile(self): self.writeXmlTag('<whileStatement>\n') self.writeXml('keyword',self.tokenizer.token) sub_WHILEFLAG=self.WHILEFLAG self.WHILEFLAG+=1 self.vmWriter.writeLabel('WHILE_START'+str(sub_WHILEFLAG)) #(expression) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.writeXml('symbol',')') self.vmWriter.writeArithmetic('not') self.vmWriter.writeIf('WHILE_OVER'+str(sub_WHILEFLAG)) #{statements} self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileStatements() self.vmWriter.writeGoto('WHILE_START'+str(sub_WHILEFLAG)) self.vmWriter.writeLabel('WHILE_OVER'+str(sub_WHILEFLAG)) self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</whileStatement>\n') def compileReturn(self): self.writeXmlTag('<returnStatement>\n') self.writeXml('keyword',self.tokenizer.token) #expression? self.NextToken() if self.tokenizer.token == ';': self.writeXml('symbol',self.tokenizer.token) self.vmWriter.writePush('constant',0) self.vmWriter.writeReturn() else: self.moveBack() self.compileExpression() self.vmWriter.writeReturn() self.writeXml('symbol',';') self.writeXmlTag('</returnStatement>\n') def compileStatements(self): self.writeXmlTag('<statements>\n') self.NextToken() while self.tokenizer.token != '}': if self.tokenizer.token =='let': self.compileLet() elif self.tokenizer.token == 'if': self.compileIf() elif self.tokenizer.token == 'while': self.compileWhile() elif self.tokenizer.token == 'do': self.compileDo() elif self.tokenizer.token == 'return': self.compileReturn() else: print 'Error!'+self.tokenizer.token exit() self.NextToken() self.writeXmlTag('</statements>\n') def compileIf(self): self.writeXmlTag('<ifStatement>\n') sub_IFFLAG=self.IFFLAG self.IFFLAG+=1 self.writeXml('keyword',self.tokenizer.token) #(expression) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileExpression() self.writeXml('symbol',')') self.vmWriter.writeArithmetic('not') self.vmWriter.writeIf('IF_RIGHT'+str(sub_IFFLAG)) #{statements} self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileStatements() self.writeXml('symbol',self.tokenizer.token) #(else {statements})? self.NextToken() if self.tokenizer.token=='else': self.vmWriter.writeGoto('IF_WRONG'+str(sub_IFFLAG)) self.vmWriter.writeLabel('IF_RIGHT'+str(sub_IFFLAG)) self.writeXml('keyword',self.tokenizer.token) self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileStatements() self.vmWriter.writeLabel('IF_WRONG'+str(sub_IFFLAG)) self.writeXml('symbol',self.tokenizer.token) else: self.vmWriter.writeLabel('IF_RIGHT'+str(sub_IFFLAG)) self.moveBack() self.writeXmlTag('</ifStatement>\n') def compileClass(self): self.writeXmlTag('<class>\n') self.NextToken() self.writeXml('keyword',self.tokenizer.token) self.NextToken() self.writeXml('identifier',self.tokenizer.token) self.ClassName=self.tokenizer.token self.NextToken() self.writeXml('keyword',self.tokenizer.token) #classVarDec* self.NextToken() while self.tokenizer.token in ('static','field'): self.compileClassVarDec() self.NextToken() #subroutineDec* while self.tokenizer.token in ('constructor','function','method'): self.compileSubroutine() self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</class>\n') def compileSubroutine(self): Subroutine_Flag='' self.WHILEFLAG=0 self.IFFLAG=0 self.writeXmlTag('<subroutineDec>\n') self.writeXml('keyword',self.tokenizer.token) self.sub_symbol.startSubroutine() if self.tokenizer.token =='method': self.sub_symbol.Define('this',self.ClassName,'argument') Subroutine_Flag='METHOD' elif self.tokenizer.token == 'constructor': Subroutine_Flag='CONSTRUCTOR' else: Subroutine_Flag='FUNCTION' #(void|type) subroutineName (parameterList) self.NextToken() self.compileType() self.NextToken() self.writeXml('identifier',self.tokenizer.token) #special, to be xxx.yyy FunctionName=self.ClassName+'.'+self.tokenizer.token self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.compileParameterList() self.writeXml('symbol',self.tokenizer.token) #subroutinBody self.writeXmlTag('<subroutineBody>\n') #{varDec* statements} self.NextToken() self.writeXml('symbol',self.tokenizer.token) self.NextToken() while self.tokenizer.token == 'var': self.compileVarDec() self.NextToken() self.moveBack() LclNum=self.sub_symbol.VarCount('var') self.vmWriter.writeFunction(FunctionName,LclNum) if Subroutine_Flag == 'METHOD': self.vmWriter.writePush('argument',0) self.vmWriter.writePop('pointer',0) elif Subroutine_Flag=='CONSTRUCTOR': FieldNum=self.class_symbol.VarCount('field') self.vmWriter.writePush('constant',FieldNum) self.vmWriter.writeCall('Memory.alloc',1) self.vmWriter.writePop('pointer',0) self.compileStatements() self.writeXml('symbol',self.tokenizer.token) self.writeXmlTag('</subroutineBody>\n') self.writeXmlTag('</subroutineDec>\n')
SymbolTable.py
#!/usr/bin/python class SymbolTable: ''' SymbolTable is a two-dimensional list. The first list contains all the names of the symbols. And Each name is also a single list, containing the [name,type,kind,index] of the symbol. ''' def __init__(self): self.Scope=[] def Constructor(self): self.Scope=[] def startSubroutine(self): self.Scope=[] def FoundName(self,name): #Search the funcName in SymbolTable for i in range(0,len(self.Scope)): if name == self.Scope[i][0]: return i return -1 def Define(self,name,segType,kind): #Add new elements into the List. index=self.VarCount(kind) if kind == 'field': kind='this' elif kind == 'var': kind='local' name=[name,segType,kind,index] self.Scope.append(name) def VarCount(self,kind): #count the number of existed elements with 'kind'. #It is used to count the index of the elements. if kind == 'field': kind='this' elif kind == 'var': kind='local' lengthKind=0 for i in range(0,len(self.Scope)): if self.Scope[i][2]==kind: lengthKind+=1 return lengthKind def KindOf(self,name): for i in range(0,len(self.Scope)): if name == self.Scope[i][0]: return self.Scope[i][2] return 'NONE' def TypeOf(self,name): for i in range(0,len(self.Scope)): if name == self.Scope[i][0]: return self.Scope[i][1] return 'NONE' def IndexOf(self,name): for i in range(0,len(self.Scope)): if name == self.Scope[i][0]: return self.Scope[i][3] return 'NONE'
VMWriter.py
#!/usr/bin/python class VMwriter: def __init__(self,wfile): self.wfile=wfile def writePush(self,segment,index): self.wfile.write('push '+segment+' '+str(index)+'\n') def writePop(self,segment,index): self.wfile.write('pop '+segment+' '+str(index)+'\n') def writeArithmetic(self,command): self.wfile.write(command+'\n') def writeFunction(self,functionName,LclNum): self.wfile.write('function '+functionName+' '+str(LclNum)+'\n') def writeReturn(self): self.wfile.write('return\n') def writeCall(self,functionName,ELNum): self.wfile.write('call '+functionName+' '+str(ELNum)+'\n') def writeLabel(self,label): self.wfile.write('label '+label+'\n') def writeGoto(self,label): self.wfile.write('goto '+label+'\n') def writeIf(self,label): self.wfile.write('if-goto '+label+'\n')
JackTokenizer.py
#!/usr/bin/python STable=('{','}','(',')','[',']','.',',',';','+','-','*','/','&','|','<','>','=','~') KWtable=('class','constructor','function','method','field','static','var','int','char','boolean', 'void','true','false','null','this','let','do','if','else','while','return') class Tokenizer(): def __init__(self,rfile): self.rfile=rfile self.token='' def hasMoreTokens(self): temp=self.rfile.read(1) while temp in ' \n\t' and temp != '': temp=self.rfile.read(1) if not temp: return 0 else: self.rfile.seek(-1,1) return 1 def advance(self): self.token='' temp=self.rfile.read(1) if temp.isalpha() or temp.isdigit() or temp == '_': while temp.isalpha() or temp.isdigit() or temp == '_': self.token+=temp temp=self.rfile.read(1) if temp in STable or temp =='"': self.rfile.seek(-1,1) elif temp == ' ' or temp == '\n': self.rfile.seek(-1,1) elif temp in STable: self.token=temp elif temp =='"': self.token += '"' temp=self.rfile.read(1) while temp != '"': self.token+=temp temp=self.rfile.read(1) self.token+='"' def tokenType(self): if self.token in KWtable: return 'KEYWORD' elif self.token in STable: return 'SYMBOL' elif self.token.isdigit(): return 'INT_CONSTANT' elif self.token.startswith('"'): return 'STRING_CONSTANT' else: return 'IDENTIFIER' def Keyword(self): return self.token def Symbol(self): return self.token def Identifier(self): return self.token def intVal(self): return int(self.token) def stringVal(self): return self.token
计算机系统要素:第十一章 编译器:代码生成