这次回顾第六章的内容,这一章介绍了如何编写汇编编译器。

课程官网:

https://www.nand2tetris.org/

视频地址:

https://www.coursera.org/learn/build-a-computer

Part 1:课程回顾

这部分从略,主题内容在Project部分。

Part 2:项目

该项目我使用的语言是Python,只要按照提示实现每个接口即可。

Code

Code将C指令翻译为2进制字符,接口如下:

根据课件29叶的对应关系进行替换即可:

class Code():
    def __init__(self):
        pass
    
    def dest(self, mnemonic):
        res = ""
        if mnemonic == "M":
            res = "001"
        elif mnemonic == "D":
            res = "010"
        elif mnemonic == "MD":
            res = "011"
        elif mnemonic == "A":
            res = "100"
        elif mnemonic == "AM":
            res = "101"
        elif mnemonic == "AD":
            res = "110"
        elif mnemonic == "AMD":
            res = "111"
        else:
            res = "000"
            
        return res
    
    def comp(self, mnemonic):
        a = ""
        c = ""
        if mnemonic == "0":
            a = "0"
            c = "101010"
        elif mnemonic == "1":
            a = "0"
            c = "111111"
        elif mnemonic == "-1":
            a = "0"
            c = "111010"
        elif mnemonic == "D":
            a = "0"
            c = "001100"
        elif mnemonic == "A":
            a = "0"
            c = "110000"
        elif mnemonic == "M":
            a = "1"
            c = "110000"
        elif mnemonic == "!D":
            a = "0"
            c = "001101"
        elif mnemonic == "!A":
            a = "0"
            c = "110001" 
        elif mnemonic == "!M":
            a = "1"
            c = "110001"             
        elif mnemonic == "-D":
            a = "0"
            c = "001111"
        elif mnemonic == "-A":
            a = "0"
            c = "110011"
        elif mnemonic == "-M":
            a = "1"
            c = "110011"
        elif mnemonic == "D+1":
            a = "0"
            c = "011111"
        elif mnemonic == "A+1":
            a = "0"
            c = "110111"
        elif mnemonic == "M+1":
            a = "1"
            c = "110111"
        elif mnemonic == "D-1":
            a = "0"
            c = "001110"
        elif mnemonic == "A-1":
            a = "0"
            c = "110010"
        elif mnemonic == "M-1":
            a = "1"
            c = "110010"
        elif mnemonic == "D+A":
            a = "0"
            c = "000010"
        elif mnemonic == "D+M":
            a = "1"
            c = "000010"
        elif mnemonic == "D-A":
            a = "0"
            c = "010011"
        elif mnemonic == "D-M":
            a = "1"
            c = "010011"  
        elif mnemonic == "A-D":
            a = "0"
            c = "000111"
        elif mnemonic == "M-D":
            a = "1"
            c = "000111"
        elif mnemonic == "D&A":
            a = "0"
            c = "000000"
        elif mnemonic == "D&M":
            a = "1"
            c = "000000"
        elif mnemonic == "D|A":
            a = "0"
            c = "010101"
        elif mnemonic == "D|M":
            a = "1"
            c = "010101"
            
        return a + c
    
    def jump(self, mnemonic):
        res = ""
        if mnemonic == "JGT":
            res = "001"
        elif mnemonic == "JEQ":
            res = "010"
        elif mnemonic == "JGE":
            res = "011"
        elif mnemonic == "JLT":
            res = "100"
        elif mnemonic == "JNE":
            res = "101"
        elif mnemonic == "JLE":
            res = "110"
        elif mnemonic == "JMP":
            res = "111"
        else:
            res = "000"
            
        return res

SymbolTable

SymbolTable记录符号表,其接口如下:

具体实现使用Python中的字典即可,对应关系参照课件43页:

class SymbolTable():
    def __init__(self):
        self.table = {}
        self.table["SP"] = 0
        self.table["LCL"] = 1
        self.table["ARG"] = 2
        self.table["THIS"] = 3
        self.table["THAT"] = 4
        for i in range(16):
            self.table["R" + str(i)] = i
        self.table["SCREEN"] = 16384
        self.table["KBD"] = 24576
    
    def contains(self, symbol):
        return symbol in self.table
    
    def addEntry(self, symbol, address):
        if not self.contains(symbol):
            self.table[symbol] = address
        else:
            print("The symbol already exists!")
            
    def getAddress(self, symbol):
        return self.table[symbol]

Parser

Parser的作用是解析Hack代码,其接口如下:

这部分复杂一些,该类里面实现了向SymbolTable添加symbol的功能:

class Parser():
    def __init__(self, filename):
        self.text = []
        with open(filename) as f:
            for i in f.readlines():
                data = i.split()
                if len(data) == 0:
                    pass
                elif data[0] == "//":
                    pass
                else:
                    #防止有空格
                    data = ''.join(data)
                    #删除每行的注释
                    data = data.replace("//", " ")
                    data = data.split()[0]
                    
                    self.text.append(data)
        #初始变量地址
        self.address = 16
        #记录当前位置
        self.i = 0
        #记录当前行,不包括()
        self.j = 0
        #最大行
        self.max = len(self.text)
        #当前命令
        self.command = ""
        #分离comp, dest, jump
        self.cp = ""
        self.dt = ""
        self.jp = ""
        
    def hasMoreCommands(self):
        return self.i < self.max
    
    def advance(self):
        if self.hasMoreCommands():
            self.command = self.text[self.i]
            self.i += 1
            if self.commandType() != "L":
                self.j += 1
        
    def commandType(self):
        if self.command[0] == "@":
            return "A"
        elif self.command[0] == "(":
            return "L"
        else:
            #分离C指令
            i1 = self.command.find("=")
            i2 = self.command.find(";")
            if i1 != -1 and i2 != -1:
                self.cp = self.command[i1+1: i2]
                self.dt = self.command[:i1]
                self.jp = self.command[i2+1:]
            elif i1 == -1:
                self.cp = self.command[:i2]
                self.dt = ""
                self.jp = self.command[i2+1:]
            elif i2 == -1:
                self.cp = self.command[i1+1:]
                self.dt = self.command[:i1]
                self.jp = ""
            return "C"
        
    def symbol(self, symboltable):
        #变量
        if self.commandType() == "A":
            sym = self.command[1:]
            #判断是否为数字
            if sym.isdigit():
                return int(sym)
            else:
                #判断是否在符号表中
                if not(symboltable.contains(sym)):
                    symboltable.addEntry(sym, self.address)
                    self.address += 1
                return symboltable.getAddress(sym)
        #符号
        elif self.commandType() == "L":
            sym = self.command[1: -1]
            if not(symboltable.contains(sym)):
                symboltable.addEntry(sym, self.j)
                
            return symboltable.getAddress(sym)
        
    def dest(self, code):
        if self.commandType() == "C":
            return code.dest(self.dt)
        
    def comp(self, code):
        if self.commandType() == "C":
            return code.comp(self.cp)

    def jump(self, code):
        if self.commandType() == "C":
            return code.jump(self.jp) 

HackAssembler

主程序,实现课本中的算法即可:

代码如下:

from Code import Code
from SymbolTable import SymbolTable
from Parser import Parser
import sys

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Error")
        sys.exit(1)
    filename = sys.argv[-1]
    
    #初始化
    code = Code()
    symboltable = SymbolTable()
    parser = Parser(filename)
    
    #第一轮循环
    while parser.hasMoreCommands():
        parser.advance()
        if parser.commandType() == "L":
            sym = parser.command[1:-1]
            addr = parser.j
            symboltable.addEntry(sym, addr)
    
    #重置
    parser.i = 0
    parser.j = 0
    
    Res = []
    #第二轮循环
    while parser.hasMoreCommands():
        parser.advance()
        res = ""
        if parser.commandType() == "A":
            #数字地址
            num = parser.symbol(symboltable)
            #二进制,前缀为0b
            b = bin(num)[2:]
            #补0
            res = (16 - len(b)) * "0" + b
            Res.append(res)
        elif parser.commandType() == "C":
            dest = parser.dest(code)
            comp = parser.comp(code)
            jump = parser.jump(code)
            
            res = "111" + comp + dest + jump
            Res.append(res)
    
    name = filename.split(".")[0]
    #存储结果
    with open(name+".hack", "w+") as f:
        for i in Res:
            f.writelines(i + "\n")