#!/usr/bin/env python ''' Hack Assembler Daniel Kronovet kronovet@gmail.com This implementation sticks closely to the API defined by the authors. That said, I'm not thrilled with this implementation. The API given by the authors felt a bit heavy; if time permits it would be worth reimplementing this with a thinner interface, specifically by minimizing the SymbolTable API and integrating Assembler and Parser into a single object. ''' class Assembler(object): def __init__(self, parser, symbol_table, code): self.parser = parser self.symbol_table = symbol_table self.code = code def assemble(self, asm_filename): self.prepare_files(asm_filename) parser = self.parser # First pass to build label table while parser.has_more_commands: parser.advance() if parser.command_type == 'L_COMMAND': self.write_L(parser.symbol) # Second pass to write .hack file parser.reset_file() self.ram_address = 16 while parser.has_more_commands: parser.advance() if parser.command_type == 'A_COMMAND': self.write_A(parser.symbol) elif parser.command_type == 'C_COMMAND': self.write_C(parser.dest, parser.comp, parser.jump) parser.close_asm() self.hack.close() def prepare_files(self, asm_filename): assert '.asm' in asm_filename, 'Must pass .asm file!' self.parser.load_file(asm_filename) hack_filename = asm_filename.replace('.asm', '.hack') self.hack = open(hack_filename, 'w') def create_address(self, symbol): address = '{0:b}'.format(int(symbol)) base = (15 - len(address)) * '0' return base + address def write(self, instruction): self.hack.write(instruction + '\n') def write_A(self, symbol): instruction = '0' try: int(symbol) except ValueError: if not self.symbol_table.contains(symbol): # Build table on first pass address = self.create_address(self.ram_address) self.symbol_table.add_entry(symbol, address) self.ram_address += 1 instruction += self.symbol_table.get_address(symbol) else: instruction += self.create_address(symbol) self.write(instruction) def write_L(self, symbol): address = self.create_address(self.parser.instruction_num+1) self.symbol_table.add_entry(symbol, address) def write_C(self, dest, comp, jump): instruction = '111' instruction += self.code.comp(comp) instruction += self.code.dest(dest) instruction += self.code.jump(jump) self.write(instruction) class Parser(object): def load_file(self, asm_filename): self.asm = open(asm_filename, 'r') self.reset_file() self.symbol = None self.dest = None self.comp = None self.jump = None self.command_type = None def reset_file(self): self.asm.seek(0) line = self.asm.readline().strip() while self.is_not_instruction(line): line = self.asm.readline().strip() self.curr_instruction = line self.instruction_num = -1 # 0 once first instruction is parsed. def close_asm(self): self.asm.close() def is_not_instruction(self, line): return not line or line[:2] == '//' @property def has_more_commands(self): return bool(self.curr_instruction) def get_next_instruction(self): line = self.asm.readline().strip() line = line.split('//')[0] line = line.strip() self.curr_instruction = line def advance(self): '''Parse current instruction and load next instruction ''' ci = self.curr_instruction if ci[0] == '@': self.parse_A(ci) self.instruction_num += 1 elif ci[0] == '(': self.parse_L(ci) else: self.parse_C(ci) self.instruction_num += 1 self.get_next_instruction() def parse_A(self, instruction): '''A instruction format: @address ''' self.symbol = instruction[1:] self.command_type = 'A_COMMAND' def parse_L(self, instruction): '''L instruction format: (LABEL) ''' self.symbol = instruction[1:-1] self.command_type = 'L_COMMAND' def parse_C(self, instruction): '''C instruction format: dest=comp;jump ''' self.dest, self.comp, self.jump = None, None, None parts = instruction.split(';') remainder = parts[0] if len(parts) == 2: self.jump = parts[1] parts = remainder.split('=') if len(parts) == 2: self.dest = parts[0] self.comp = parts[1] else: self.comp = parts[0] self.command_type = 'C_COMMAND' class Code(object): def dest(self, mnemonic): '''Alt: Enumerate all possibilities and do dictionary lookup. Current implemention is more flexible, but slower (max 9 comparisons vs 1 hashing) ''' bin = ['0', '0', '0'] if mnemonic is None: return ''.join(bin) if 'A' in mnemonic: bin[0] = '1' if 'D' in mnemonic: bin[1] = '1' if 'M' in mnemonic: bin[2] = '1' return ''.join(bin) def comp(self, mnemonic): comp_dict = { '0': '101010', '1': '111111', '-1': '111010', 'D': '001100', 'A': '110000', '!D': '001101', '!A': '110001', '-D': '001111', '-A': '110011', 'D+1': '011111', 'A+1': '110111', 'D-1': '001110', 'A-1': '110010', 'D+A': '000010', 'D-A': '010011', 'A-D': '000111', 'D&A': '000000', 'D|A': '010101', } a_bit = '0' if 'M' in mnemonic: a_bit = '1' mnemonic = mnemonic.replace('M', 'A') c_bit = comp_dict.get(mnemonic, '000000') return a_bit + c_bit def jump(self, mnemonic): jump_dict = { 'JGT': '001', 'JEQ': '010', 'JGE': '011', 'JLT': '100', 'JNE': '101', 'JLE': '110', 'JMP': '111', } return jump_dict.get(mnemonic, '000') class SymbolTable(object): def __init__(self): self.symbol_dict = self.base_table() self.ram_position = 16 # 0-15 have preset values def get_address(self, symbol): return self.symbol_dict[symbol] def contains(self, symbol): return symbol in self.symbol_dict def add_entry(self, symbol, address): self.symbol_dict[symbol] = address def base_table(self): # 15 bit addresses, 32K locations return { 'SP': '000000000000000', 'LCL': '000000000000001', 'ARG': '000000000000010', 'THIS': '000000000000011', 'THAT': '000000000000100', 'R0': '000000000000000', 'R1': '000000000000001', 'R2': '000000000000010', 'R3': '000000000000011', 'R4': '000000000000100', 'R5': '000000000000101', 'R6': '000000000000110', 'R7': '000000000000111', 'R8': '000000000001000', 'R9': '000000000001001', 'R10': '000000000001010', 'R11': '000000000001011', 'R12': '000000000001100', 'R13': '000000000001101', 'R14': '000000000001110', 'R15': '000000000001111', 'SCREEN': '100000000000000', 'KBD': '110000000000000', } if __name__ == '__main__': import sys #asm_filename = sys.argv[1] asm_filename = "Rect.asm" assembler = Assembler(Parser(), SymbolTable(), Code()) assembler.assemble(asm_filename)