Files
eceg431/06/hasm.py

301 lines
8.1 KiB
Python

import sys
import os
class Parser:
# reads asm commands and breaks them into components
def __init__(self, filename):
# load and clean asm file
self.commands = []
self.current_command = ""
self.command_index = -1
with open(filename, 'r') as file:
for line in file:
line = line.strip()
# remove comments
if '//' in line:
line = line[:line.index('//')]
line = line.strip()
# skip empty lines
if line:
self.commands.append(line)
def hasMoreCommands(self):
return self.command_index + 1 < len(self.commands)
def advance(self):
# move to next command
if self.hasMoreCommands():
self.command_index += 1
self.current_command = self.commands[self.command_index]
def commandType(self):
# identify command type
if self.current_command.startswith('@'):
return 'A_COMMAND'
elif self.current_command.startswith('(') and self.current_command.endswith(')'):
return 'L_COMMAND'
else:
return 'C_COMMAND'
def symbol(self):
# extract symbol from @xxx or (xxx)
if self.commandType() == 'A_COMMAND':
return self.current_command[1:]
elif self.commandType() == 'L_COMMAND':
return self.current_command[1:-1]
return None
def dest(self):
# extract dest from c-command
if self.commandType() == 'C_COMMAND':
if '=' in self.current_command:
return self.current_command.split('=')[0]
return 'null'
return None
def comp(self):
# extract comp from c-command
if self.commandType() == 'C_COMMAND':
command = self.current_command
# strip dest if present
if '=' in command:
command = command.split('=')[1]
# strip jump if present
if ';' in command:
command = command.split(';')[0]
return command
return None
def jump(self):
# extract jump from c-command
if self.commandType() == 'C_COMMAND':
if ';' in self.current_command:
return self.current_command.split(';')[1]
return 'null'
return None
class Code:
# translates mnemonics to binary codes
def __init__(self):
self.dest_codes = {
'null': '000',
'M': '001',
'D': '010',
'MD': '011',
'A': '100',
'AM': '101',
'AD': '110',
'AMD': '111'
}
self.jump_codes = {
'null': '000',
'JGT': '001',
'JEQ': '010',
'JGE': '011',
'JLT': '100',
'JNE': '101',
'JLE': '110',
'JMP': '111'
}
self.comp_codes = {
# a=0 computations
'0': '0101010',
'1': '0111111',
'-1': '0111010',
'D': '0001100',
'A': '0110000',
'!D': '0001101',
'!A': '0110001',
'-D': '0001111',
'-A': '0110011',
'D+1': '0011111',
'A+1': '0110111',
'D-1': '0001110',
'A-1': '0110010',
'D+A': '0000010',
'D-A': '0010011',
'A-D': '0000111',
'D&A': '0000000',
'D|A': '0010101',
# a=1 computations (M versions)
'M': '1110000',
'!M': '1110001',
'-M': '1110011',
'M+1': '1110111',
'M-1': '1110010',
'D+M': '1000010',
'D-M': '1010011',
'M-D': '1000111',
'D&M': '1000000',
'D|M': '1010101'
}
def dest(self, mnemonic):
return self.dest_codes.get(mnemonic, '000')
def comp(self, mnemonic):
return self.comp_codes.get(mnemonic, '0000000')
def jump(self, mnemonic):
return self.jump_codes.get(mnemonic, '000')
class SymbolTable:
# map symbols to addrs
def __init__(self):
# predefined symbols
self.table = {
'SP': 0,
'LCL': 1,
'ARG': 2,
'THIS': 3,
'THAT': 4,
'R0': 0,
'R1': 1,
'R2': 2,
'R3': 3,
'R4': 4,
'R5': 5,
'R6': 6,
'R7': 7,
'R8': 8,
'R9': 9,
'R10': 10,
'R11': 11,
'R12': 12,
'R13': 13,
'R14': 14,
'R15': 15,
'SCREEN': 16384,
'KBD': 24576
}
def addEntry(self, symbol, address):
self.table[symbol] = address
def contains(self, symbol):
return symbol in self.table
def GetAddress(self, symbol):
return self.table.get(symbol, None)
def assemble(input_file):
# two-pass assembly: build symbols then generate code
parser = Parser(input_file)
code = Code()
symbol_table = SymbolTable()
# first pass: scan for labels
rom_address = 0
parser.command_index = -1
while parser.hasMoreCommands():
parser.advance()
command_type = parser.commandType()
if command_type == 'L_COMMAND':
# add label to symbol table
symbol = parser.symbol()
symbol_table.addEntry(symbol, rom_address)
else:
# count actual instructions
rom_address += 1
# second pass: generate binary code
output_lines = []
variable_address = 16 # variables start at RAM[16]
parser.command_index = -1
while parser.hasMoreCommands():
parser.advance()
command_type = parser.commandType()
if command_type == 'A_COMMAND':
symbol = parser.symbol()
# resolve symbol to address
if symbol.isdigit():
address = int(symbol)
else:
if symbol_table.contains(symbol):
address = symbol_table.GetAddress(symbol)
else:
# new variable
symbol_table.addEntry(symbol, variable_address)
address = variable_address
variable_address += 1
# convert to 16-bit binary
binary_instruction = format(address, '016b')
output_lines.append(binary_instruction)
elif command_type == 'C_COMMAND':
# parse c-instruction components
dest_mnemonic = parser.dest()
comp_mnemonic = parser.comp()
jump_mnemonic = parser.jump()
# translate to binary
dest_code = code.dest(dest_mnemonic)
comp_code = code.comp(comp_mnemonic)
jump_code = code.jump(jump_mnemonic)
# assemble 16-bit instruction: 111accccccdddjjj
binary_instruction = '111' + comp_code + dest_code + jump_code
output_lines.append(binary_instruction)
# L_COMMAND generates no code
return output_lines
def main():
# assemble hack assembly file to binary
if len(sys.argv) != 2:
print("Usage: python hasm.py <input_file.asm>")
sys.exit(1)
input_file = sys.argv[1]
# validate input file
if not os.path.exists(input_file):
print(f"Error: File '{input_file}' not found")
sys.exit(1)
if not input_file.endswith('.asm'):
print("Error: Input file must have .asm extension")
sys.exit(1)
# generate output filename
output_file = input_file[:-4] + '.hack'
try:
# assemble program
binary_code = assemble(input_file)
# write binary output
with open(output_file, 'w') as f:
for line in binary_code:
f.write(line + '\n')
print(f"Assembly completed. Output written to '{output_file}'")
print(f"Generated {len(binary_code)} instructions")
except Exception as e:
print(f"Error during assembly: {e}")
sys.exit(1)
if __name__ == "__main__":
main()