From Nand to Tetris week 10
这次回顾第十章的内容,这一章介绍了编译器的第一部分:语法分析。
课程官网:
视频地址:
https://www.coursera.org/learn/build-a-computer
Part 1:课程回顾
背景介绍
这一章完成了编译器的前半部分:语法分析,输出结果为XML格式,本章重点依然为项目,背景介绍部分从略。
Part 2:项目
项目的注意点可以参考PPT的83,110,125页,分为JackTokenizer,CompilationEngine,JackAnalyzer。
JackTokenizer
class JackTokenizer():
Keyword = {'class', 'constructor', 'function', 'method',
'field', 'static', 'var', 'int', 'char',
'boolean', 'void', 'true', 'false', 'null',
'this', 'let', 'do' , 'if', 'else', 'while',
'return'}
Symbol = {'{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*',
'/', '&', '|', '<', '>', '=', '~'}
StringConstant = '"'
Replace = {"<": "<", ">": ">", '"': """, '&': "&"}
Replacewords = ['&', '"', "<", ">"]
def __init__(self, filename):
self.Token = []
self.sep = '[' + ''.join(JackTokenizer.Symbol) + ']'
self.cnt = 0
tmp = filename.split('/')
self.filename = '/'.join(tmp[:-1]) + '/' + tmp[-1].split('.')[0]
#读取文件,忽略注释
with open(filename) as f:
for i in f.readlines():
data = i.split()
if len(data) == 0:
pass
#忽略注释
elif data[0] == "//":
pass
#忽略注释
elif data[0] == "/**" or data[0] == "*" or data[0] == "*/":
pass
else:
#防止有空格
data = ' '.join(data)
#删除每行的注释
data = data.split("//")[0]
#处理每行
data = self.process_sentence(data)
#合并结果
self.Token += data
self.maxlength = len(self.Token)
self.token = ""
def next_token(self):
if self.cnt < self.maxlength:
return self.Token[self.cnt]
def process_sentence(self, string):
res = []
i = 0
n = len(string)
#判断字符串
cnt = 0
for j in range(n):
#处理字符串
if string[j] in JackTokenizer.StringConstant:
if cnt == 0:
i = j
cnt += 1
else:
res.append(string[i: (j+1)])
cnt = 0
continue
#处理空格
if string[j] == ' ' and cnt == 0:
if (j > i):
res.append(string[i: j])
i = j + 1
#处理symbol
if string[j] in JackTokenizer.Symbol:
if (j > i and string[j-1] != '"'):
res.append(string[i: j])
res.append(string[j])
i = j + 1
return res
def hasMoreTokens(self):
return self.cnt < self.maxlength
def advance(self):
if self.hasMoreTokens():
self.token = self.Token[self.cnt]
self.cnt += 1
def replace(self, word):
for i in JackTokenizer.Replacewords:
if i in word:
word = word.replace(i, JackTokenizer.Replace[i])
return word
def tokenType(self):
if self.token in JackTokenizer.Keyword:
return "KEYWORD"
elif self.token in JackTokenizer.Symbol:
return "SYMBOL"
elif self.token[0] == '"' and self.token[-1] == '"':
return "STRING_CONST"
elif str.isdigit(self.token):
return "INT_CONST"
else:
return "IDENTIFIER"
def keyword(self):
return self.token
def symbol(self):
self.token = self.replace(self.token)
return self.token
def identifier(self):
return self.token
def intVal(self):
return int(self.token)
def stringVal(self):
self.token = self.replace(self.token[1:-1])
return self.token
def Write_XML(jackTokenizer):
sentence = ""
filename = jackTokenizer.filename + "toke.xml"
with open(filename, "w+") as f:
f.write("<tokens>\n")
while jackTokenizer.hasMoreTokens():
jackTokenizer.advance()
tokentype = jackTokenizer.tokenType()
if tokentype == "KEYWORD":
sentence = "<keyword> " + jackTokenizer.keyword() + " </keyword>"
elif tokentype == "SYMBOL":
sentence = "<symbol> " + jackTokenizer.symbol() + " </symbol>"
elif tokentype == "IDENTIFIER":
sentence = "<identifier> " + jackTokenizer.identifier() + " </identifier>"
elif tokentype == "INT_CONST":
sentence = "<integerConstant> " + str(jackTokenizer.intVal()) + " </integerConstant>"
elif tokentype == "STRING_CONST":
sentence = "<stringConstant> " + jackTokenizer.stringVal() + " </stringConstant>"
f.write(sentence + "\n")
f.write("</tokens>\n")
CompilationEngine
from JackTokenizer import JackTokenizer
class CompilationEngine():
def __init__(self, filename, flag=1):
self.tokenizer = JackTokenizer(filename)
if flag == 1:
self.output = self.tokenizer.filename + "X.xml"
else:
self.output = self.tokenizer.filename + ".xml"
self.file = open(self.output, "w+")
def CompileClass(self):
#<class>
s = "<class>\n"
self.file.write(s)
#class
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#className
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#{
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#判断下一项是否为classVarDec
while self.tokenizer.token in ['static', 'field']:
#classVarDec*
self.CompileClassVarDec()
while self.tokenizer.token in ['constructor', 'function', 'method']:
#subroutineDec
self.CompileSubroutine()
#}
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</class>
s = "</class>\n"
self.file.write(s)
def CompileClassVarDec(self):
#<classVarDec>
s = "<classVarDec>\n"
self.file.write(s)
#static,field
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#type
self.CompileType()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
#, varName
self.tokenizer.advance()
while self.tokenizer.token == ',':
#','
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#;
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</classVarDec>
s = "</classVarDec>\n"
self.file.write(s)
def CompileSubroutine(self):
#<subroutineDec>
s = "<subroutineDec>\n"
self.file.write(s)
#constructor, function, method
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#void, int, char, boolean, className
if self.tokenizer.token in ['int', 'char', 'boolean', 'void']:
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
else:
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#subroutineName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#parameterlist
self.CompileParameterList()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#subroutineBody
self.CompileSubroutineBody()
#</subroutineDec>
s = "</subroutineDec>\n"
self.file.write(s)
def CompileSubroutineBody(self):
#<subroutineBody>
s = "<subroutineBody>\n"
self.file.write(s)
#{
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#varDec
while self.tokenizer.token == "var":
self.CompileVarDec()
#statements
self.CompileStatements()
#}
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</subroutineBody>
s = "</subroutineBody>\n"
self.file.write(s)
def CompileType(self):
if self.tokenizer.token in ['int', 'char', 'boolean']:
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
else:
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
def CompileParameterList(self):
#<parameterList>
s = "<parameterList>\n"
self.file.write(s)
#type
if self.tokenizer.token in ['int', 'char', 'boolean'] or self.tokenizer.tokenType == "IDENTIFIER":
#type
self.CompileType()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#,
while self.tokenizer.token == ",":
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#type
self.CompileType()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#</parameterList>
s = "</parameterList>\n"
self.file.write(s)
def CompileVarDec(self):
#<varDec>
s = "<varDec>\n"
self.file.write(s)
#var
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#type
self.CompileType()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#,
while self.tokenizer.token == ",":
#,
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#;
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</varDec>
s = "</varDec>\n"
self.file.write(s)
def CompileStatement(self):
if self.tokenizer.token == "let":
self.CompileLet()
elif self.tokenizer.token == "if":
self.CompileIf()
elif self.tokenizer.token == "while":
self.CompileWhile()
elif self.tokenizer.token == "do":
self.CompileDo()
elif self.tokenizer.token == "return":
self.CompileReturn()
def CompileStatements(self):
#<statements>
s = "<statements>\n"
self.file.write(s)
while self.tokenizer.token in ["let", "if", "while", "do", "return"]:
self.CompileStatement()
#</statements>
s = "</statements>\n"
self.file.write(s)
def CompileDo(self):
#<doStatement>
s = "<doStatement>\n"
self.file.write(s)
#do
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#subroutineCall
self.CompileSubroutineCall()
#;
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</doStatement>
s = "</doStatement>\n"
self.file.write(s)
def CompileLet(self):
#<letStatement>
s = "<letStatement>\n"
self.file.write(s)
#let
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#[
if self.tokenizer.token == "[":
#[
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#]
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#=
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression();
#;
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</letStatement>
s = "</letStatement>\n"
self.file.write(s)
def CompileWhile(self):
#<whileStatement>
s = "<whileStatement>\n"
self.file.write(s)
#while
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#{
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#statements
self.CompileStatements()
#}
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</whileStatement>
s = "</whileStatement>\n"
self.file.write(s)
def CompileReturn(self):
#<returnStatement>
s = "<returnStatement>\n"
self.file.write(s)
#return
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
if self.tokenizer.token != ';':
self.CompileExpression()
#;
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#<returnStatement>
s = "</returnStatement>\n"
self.file.write(s)
def CompileIf(self):
#<ifStatement>
s = "<ifStatement>\n"
self.file.write(s)
#if
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#{
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#statements
self.CompileStatements()
#}
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#else
if self.tokenizer.token == "else":
#else
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#{
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#statements
self.CompileStatements()
#}
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#</ifStatement>
s = "</ifStatement>\n"
self.file.write(s)
def CompileExpression(self):
#<expression>
s = "<expression>\n"
self.file.write(s)
self.CompileTerm()
#判断是否为op
while self.tokenizer.token in ['+', '-', '*', '/', '&', '|', '<', '>', '=']:
#op
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#term
self.CompileTerm()
#</expression>
s = "</expression>\n"
self.file.write(s)
def CompileTerm(self):
#<term>
s = "<term>\n"
self.file.write(s)
#integerConstant
if self.tokenizer.tokenType() == "INT_CONST":
s = "<integerConstant> " + str(self.tokenizer.intVal()) + " </integerConstant>\n"
self.file.write(s)
self.tokenizer.advance()
#stringConstant
elif self.tokenizer.tokenType() == "STRING_CONST":
s = "<stringConstant> " + self.tokenizer.stringVal() + " </stringConstant>\n"
self.file.write(s)
self.tokenizer.advance()
#keywordConstant
elif self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.token in ['true', 'false', 'null', 'this']:
s = "<keyword> " + self.tokenizer.keyword() + " </keyword>\n"
self.file.write(s)
self.tokenizer.advance()
#varName[], varName, subroutineCall
elif self.tokenizer.tokenType() == "IDENTIFIER":
#a[]
if self.tokenizer.next_token() == "[":
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#[
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#]
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#subroutineCall
elif self.tokenizer.next_token() in ["(", "."]:
self.CompileSubroutineCall()
else:
#varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#(expression)
elif self.tokenizer.token == "(":
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#unaryOp term
elif self.tokenizer.token in ['-', '~']:
#unaryOp
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#term
self.CompileTerm()
#</term>
s = "</term>\n"
self.file.write(s)
def IsExpression(self):
#integerConstant
if self.tokenizer.tokenType() == "INT_CONST":
return True
#stringConstant
elif self.tokenizer.tokenType() == "STRING_CONST":
return True
#keywordConstant
elif self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.token in ['true', 'false', 'null', 'this']:
return True
#varName[], varName, subroutineCall
elif self.tokenizer.tokenType() == "IDENTIFIER":
return True
#(expression)
elif self.tokenizer.token == "(":
return True
#unaryOp term
elif self.tokenizer.token in ['-', '~']:
return True
#subroutineCall
else:
return False
def CompileExpressionList(self):
#<expressionList>
s = "<expressionList>\n"
self.file.write(s)
if self.IsExpression():
self.CompileExpression()
while self.tokenizer.token == ",":
#,
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expression
self.CompileExpression()
#</expressionList>
s = "</expressionList>\n"
self.file.write(s)
def CompileSubroutineCall(self):
if self.tokenizer.next_token() == "(":
#subroutineName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expressionList
self.CompileExpressionList()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
else:
#className|varName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#.
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#subroutineName
s = "<identifier> " + self.tokenizer.identifier() + " </identifier>\n"
self.file.write(s)
self.tokenizer.advance()
#(
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
#expressionList
self.CompileExpressionList()
#)
s = "<symbol> " + self.tokenizer.symbol() + " </symbol>\n"
self.file.write(s)
self.tokenizer.advance()
JackAnalyzer
from CompilationEngine import CompilationEngine
import sys
import os
def AnalyzerOneFile(filename):
compiler = CompilationEngine(filename, 2)
compiler.tokenizer.advance()
compiler.CompileClass()
compiler.file.close()
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Error")
sys.exit(1)
#划分输入
data = sys.argv[-1].split("/")
#文件名
file = data[-1]
#判断是否为目录
Name = file.split(".")
#路径
path = sys.argv[-1]
#划分文件名
if len(Name) == 2:
filename = path
AnalyzerOneFile(filename)
else:
files = os.listdir(sys.argv[-1])
for f in files:
if f.split(".")[-1] == "jack":
filename = path + "/" + f
AnalyzerOneFile(filename)
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Doraemonzzz!
评论
ValineLivere