Add Hack assembler implementation and project files

This commit is contained in:
2025-10-06 23:49:11 -04:00
parent 1853f8070e
commit ed1df6332a
5 changed files with 399 additions and 2 deletions

View File

@@ -5,8 +5,8 @@ import sys
import os
def count_bunny(filepath):
# Count "bunny" occurrences in a single file
# Skips comment lines and inline comments (anything after #)
# count "bunny" occurrences in a single file
# skips comment lines and inline comments (anything after #)
count = 0
with open(filepath, 'r') as f:
for line in f:

1
06/PythonFileName.txt Normal file
View File

@@ -0,0 +1 @@
hasm.py

300
06/hasm.py Normal file
View File

@@ -0,0 +1,300 @@
import sys
import os
class Parser:
# reads asm commands and breaks them into components
def __init__(self, filename):
# load and clean asm file
self.commands = []
self.current_command = ""
self.command_index = -1
with open(filename, 'r') as file:
for line in file:
line = line.strip()
# remove comments
if '//' in line:
line = line[:line.index('//')]
line = line.strip()
# skip empty lines
if line:
self.commands.append(line)
def hasMoreCommands(self):
return self.command_index + 1 < len(self.commands)
def advance(self):
# move to next command
if self.hasMoreCommands():
self.command_index += 1
self.current_command = self.commands[self.command_index]
def commandType(self):
# identify command type
if self.current_command.startswith('@'):
return 'A_COMMAND'
elif self.current_command.startswith('(') and self.current_command.endswith(')'):
return 'L_COMMAND'
else:
return 'C_COMMAND'
def symbol(self):
# extract symbol from @xxx or (xxx)
if self.commandType() == 'A_COMMAND':
return self.current_command[1:]
elif self.commandType() == 'L_COMMAND':
return self.current_command[1:-1]
return None
def dest(self):
# extract dest from c-command
if self.commandType() == 'C_COMMAND':
if '=' in self.current_command:
return self.current_command.split('=')[0]
return 'null'
return None
def comp(self):
# extract comp from c-command
if self.commandType() == 'C_COMMAND':
command = self.current_command
# strip dest if present
if '=' in command:
command = command.split('=')[1]
# strip jump if present
if ';' in command:
command = command.split(';')[0]
return command
return None
def jump(self):
# extract jump from c-command
if self.commandType() == 'C_COMMAND':
if ';' in self.current_command:
return self.current_command.split(';')[1]
return 'null'
return None
class Code:
# translates mnemonics to binary codes
def __init__(self):
self.dest_codes = {
'null': '000',
'M': '001',
'D': '010',
'MD': '011',
'A': '100',
'AM': '101',
'AD': '110',
'AMD': '111'
}
self.jump_codes = {
'null': '000',
'JGT': '001',
'JEQ': '010',
'JGE': '011',
'JLT': '100',
'JNE': '101',
'JLE': '110',
'JMP': '111'
}
self.comp_codes = {
# a=0 computations
'0': '0101010',
'1': '0111111',
'-1': '0111010',
'D': '0001100',
'A': '0110000',
'!D': '0001101',
'!A': '0110001',
'-D': '0001111',
'-A': '0110011',
'D+1': '0011111',
'A+1': '0110111',
'D-1': '0001110',
'A-1': '0110010',
'D+A': '0000010',
'D-A': '0010011',
'A-D': '0000111',
'D&A': '0000000',
'D|A': '0010101',
# a=1 computations (M versions)
'M': '1110000',
'!M': '1110001',
'-M': '1110011',
'M+1': '1110111',
'M-1': '1110010',
'D+M': '1000010',
'D-M': '1010011',
'M-D': '1000111',
'D&M': '1000000',
'D|M': '1010101'
}
def dest(self, mnemonic):
return self.dest_codes.get(mnemonic, '000')
def comp(self, mnemonic):
return self.comp_codes.get(mnemonic, '0000000')
def jump(self, mnemonic):
return self.jump_codes.get(mnemonic, '000')
class SymbolTable:
# map symbols to addrs
def __init__(self):
# predefined symbols
self.table = {
'SP': 0,
'LCL': 1,
'ARG': 2,
'THIS': 3,
'THAT': 4,
'R0': 0,
'R1': 1,
'R2': 2,
'R3': 3,
'R4': 4,
'R5': 5,
'R6': 6,
'R7': 7,
'R8': 8,
'R9': 9,
'R10': 10,
'R11': 11,
'R12': 12,
'R13': 13,
'R14': 14,
'R15': 15,
'SCREEN': 16384,
'KBD': 24576
}
def addEntry(self, symbol, address):
self.table[symbol] = address
def contains(self, symbol):
return symbol in self.table
def GetAddress(self, symbol):
return self.table.get(symbol, None)
def assemble(input_file):
# two-pass assembly: build symbols then generate code
parser = Parser(input_file)
code = Code()
symbol_table = SymbolTable()
# first pass: scan for labels
rom_address = 0
parser.command_index = -1
while parser.hasMoreCommands():
parser.advance()
command_type = parser.commandType()
if command_type == 'L_COMMAND':
# add label to symbol table
symbol = parser.symbol()
symbol_table.addEntry(symbol, rom_address)
else:
# count actual instructions
rom_address += 1
# second pass: generate binary code
output_lines = []
variable_address = 16 # variables start at RAM[16]
parser.command_index = -1
while parser.hasMoreCommands():
parser.advance()
command_type = parser.commandType()
if command_type == 'A_COMMAND':
symbol = parser.symbol()
# resolve symbol to address
if symbol.isdigit():
address = int(symbol)
else:
if symbol_table.contains(symbol):
address = symbol_table.GetAddress(symbol)
else:
# new variable
symbol_table.addEntry(symbol, variable_address)
address = variable_address
variable_address += 1
# convert to 16-bit binary
binary_instruction = format(address, '016b')
output_lines.append(binary_instruction)
elif command_type == 'C_COMMAND':
# parse c-instruction components
dest_mnemonic = parser.dest()
comp_mnemonic = parser.comp()
jump_mnemonic = parser.jump()
# translate to binary
dest_code = code.dest(dest_mnemonic)
comp_code = code.comp(comp_mnemonic)
jump_code = code.jump(jump_mnemonic)
# assemble 16-bit instruction: 111accccccdddjjj
binary_instruction = '111' + comp_code + dest_code + jump_code
output_lines.append(binary_instruction)
# L_COMMAND generates no code
return output_lines
def main():
# assemble hack assembly file to binary
if len(sys.argv) != 2:
print("Usage: python hasm.py <input_file.asm>")
sys.exit(1)
input_file = sys.argv[1]
# validate input file
if not os.path.exists(input_file):
print(f"Error: File '{input_file}' not found")
sys.exit(1)
if not input_file.endswith('.asm'):
print("Error: Input file must have .asm extension")
sys.exit(1)
# generate output filename
output_file = input_file[:-4] + '.hack'
try:
# assemble program
binary_code = assemble(input_file)
# write binary output
with open(output_file, 'w') as f:
for line in binary_code:
f.write(line + '\n')
print(f"Assembly completed. Output written to '{output_file}'")
print(f"Generated {len(binary_code)} instructions")
except Exception as e:
print(f"Error during assembly: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

78
06/hasm_SKELETON.py Normal file
View File

@@ -0,0 +1,78 @@
import argparse
class Parser():
def __init__(self, data):
pass
def hasMoreCommnads(self):
pass
def advance(self):
pass
def commandType(self):
pass
def symbol(self):
pass
def dest(self):
pass
def comp(self):
pass
def jump(self):
pass
class Code():
def dest(self, mnemonic):
pass
def comp(self, mnemonic):
pass
def jump(self, mnemonic):
pass
class SymbolTable():
def __init__(self):
pass
def addEntry(self, symbol, address):
pass
def contains(self, symbol):
pass
def GetAddress(self, symbol):
pass
def main():
'''
The main function for the assembler. Takes a command line argument for the input file
and an optional argument for the output file.
'''
print("You do not have to use this method to parse arguments.")
print("The example from project 5.5 works fine too.")
print("You will get an error soon if you are running this.")
print("You need to actually modify the code. What function")
print("do you want to run first?")
print("-------------------------")
# Create an argument parser for command line arguments
a_parser = argparse.ArgumentParser(description='Assembler for the Hack CPU')
a_parser.add_argument('input_file', type=str)
a_parser.add_argument('-o', dest='output_file', default='Prog.hack', type=str)
args = a_parser.parse_args()
parser = Parser(args.input_file)
parser.DoYourThingButPleaseRenameThisMethod() #<-- will error here
# Call the main function
if __name__ == "__main__":
main()

18
06/readme.txt Normal file
View File

@@ -0,0 +1,18 @@
Project 6: The Assembler
The description for this project is found at:
https://www.nand2tetris.org/project06
You may _not_ work as pairs on this project. For now, please work individually. We'll start partners in the next projects after Fall Break.
The default name for your file should be 'hasm.py'. If you name your file something else, just edit the "PythonFileName.txt" file to have the correct name. For example, the posted text file would tell the grader to run 'hasm_SKELETON.py' for assembling files. Be sure to place your assembled .hack file into the *same location* as the .asm file that you are assembling. The name should be such that XXX.asm is compiled to XXX.hack.
Hints:
a) Use the proposed API in chapter 6 that splits the tasks across three "modules."
b) Start with creating an assembler that will assemble symbol-less files. You can test your assembler against the "MaxL.asm", "RectL.asm" and "PongL.asm" files provided in the project06 directory.
c) To develop a symbol table, you will need to i) pre-load your symbol table with the pre-defined symbols and ii) have your program take a two-pass approach that involves reading the ASM file twice. (Once, first to read in all the symbols and second do then to the actual assembling.)