project10 - complete

This commit is contained in:
2025-11-20 17:31:44 -05:00
parent 6b233eb9c9
commit 30d7936c6e
2 changed files with 44 additions and 32 deletions

View File

@@ -241,13 +241,13 @@ class CompilationEngine:
def writeOpenTag(self, tag): def writeOpenTag(self, tag):
# write opening XML tag # write opening XML tag
self.output.write(" " * self.indent + f"<{tag}>\n") self.output.write(" " * self.indent + f"<{tag}>\r\n")
self.indent += 1 self.indent += 1
def writeCloseTag(self, tag): def writeCloseTag(self, tag):
# write closing XML tag # write closing XML tag
self.indent -= 1 self.indent -= 1
self.output.write(" " * self.indent + f"</{tag}>\n") self.output.write(" " * self.indent + f"</{tag}>\r\n")
def writeTerminal(self, tag, value): def writeTerminal(self, tag, value):
# write terminal (token) XML element # write terminal (token) XML element
@@ -261,7 +261,7 @@ class CompilationEngine:
elif value == "&": elif value == "&":
value = "&amp;" value = "&amp;"
self.output.write(" " * self.indent + f"<{tag}> {value} </{tag}>\n") self.output.write(" " * self.indent + f"<{tag}> {value} </{tag}>\r\n")
def writeCurrentToken(self): def writeCurrentToken(self):
# write current token as XML # write current token as XML
@@ -456,7 +456,10 @@ class CompilationEngine:
self.writeOpenTag("statements") self.writeOpenTag("statements")
# process statements # process statements
while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]: while (
self.tokenizer.getTokenType() == "KEYWORD"
and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
):
keyword = self.tokenizer.keyword() keyword = self.tokenizer.keyword()
if keyword == "let": if keyword == "let":
@@ -470,8 +473,6 @@ class CompilationEngine:
elif keyword == "return": elif keyword == "return":
self.compileReturn() self.compileReturn()
self.tokenizer.advance()
self.writeCloseTag("statements") self.writeCloseTag("statements")
def compileLet(self): def compileLet(self):
@@ -510,6 +511,7 @@ class CompilationEngine:
self.writeCurrentToken() self.writeCurrentToken()
self.writeCloseTag("letStatement") self.writeCloseTag("letStatement")
self.tokenizer.advance()
def compileIf(self): def compileIf(self):
# compile if statement # compile if statement
@@ -556,10 +558,7 @@ class CompilationEngine:
# closing brace # closing brace
self.writeCurrentToken() self.writeCurrentToken()
else: self.tokenizer.advance()
# no else, back up
return
self.writeCloseTag("ifStatement") self.writeCloseTag("ifStatement")
def compileWhile(self): def compileWhile(self):
@@ -592,6 +591,7 @@ class CompilationEngine:
self.writeCurrentToken() self.writeCurrentToken()
self.writeCloseTag("whileStatement") self.writeCloseTag("whileStatement")
self.tokenizer.advance()
def compileDo(self): def compileDo(self):
# compile do statement # compile do statement
@@ -630,6 +630,7 @@ class CompilationEngine:
self.writeCurrentToken() self.writeCurrentToken()
self.writeCloseTag("doStatement") self.writeCloseTag("doStatement")
self.tokenizer.advance()
def compileReturn(self): def compileReturn(self):
# compile return statement # compile return statement
@@ -648,6 +649,7 @@ class CompilationEngine:
self.writeCurrentToken() self.writeCurrentToken()
self.writeCloseTag("returnStatement") self.writeCloseTag("returnStatement")
self.tokenizer.advance()
def compileExpression(self): def compileExpression(self):
# compile expression # compile expression
@@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
if tokenizeOnly: if tokenizeOnly:
# tokenizer test output # tokenizer test output
output = open(outputFile, "w") output = open(outputFile, "w")
output.write("<tokens>\n") output.write("<tokens>\r\n")
while tokenizer.hasMoreTokens(): while tokenizer.hasMoreTokens():
tokenizer.advance() tokenizer.advance()
@@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
if tokenType == "KEYWORD": if tokenType == "KEYWORD":
value = tokenizer.keyword() value = tokenizer.keyword()
output.write(f"<keyword> {value} </keyword>\n") output.write(f"<keyword> {value} </keyword>\r\n")
elif tokenType == "SYMBOL": elif tokenType == "SYMBOL":
value = tokenizer.symbol() value = tokenizer.symbol()
# escape special characters # escape special characters
@@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
value = "&quot;" value = "&quot;"
elif value == "&": elif value == "&":
value = "&amp;" value = "&amp;"
output.write(f"<symbol> {value} </symbol>\n") output.write(f"<symbol> {value} </symbol>\r\n")
elif tokenType == "IDENTIFIER": elif tokenType == "IDENTIFIER":
value = tokenizer.identifier() value = tokenizer.identifier()
output.write(f"<identifier> {value} </identifier>\n") output.write(f"<identifier> {value} </identifier>\r\n")
elif tokenType == "INT_CONST": elif tokenType == "INT_CONST":
value = tokenizer.intVal() value = tokenizer.intVal()
output.write(f"<integerConstant> {value} </integerConstant>\n") output.write(f"<integerConstant> {value} </integerConstant>\r\n")
elif tokenType == "STRING_CONST": elif tokenType == "STRING_CONST":
value = tokenizer.stringVal() value = tokenizer.stringVal()
output.write(f"<stringConstant> {value} </stringConstant>\n") output.write(f"<stringConstant> {value} </stringConstant>\r\n")
output.write("</tokens>\n") output.write("</tokens>\r\n")
output.close() output.close()
else: else:
# full compilation # full compilation
@@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
def main(): def main():
# analyze Jack file or directory # analyze Jack file or directory
if len(sys.argv) < 2: if len(sys.argv) != 2:
print("Usage: python JackAnalyzer.py <file_or_directory> [-t]") print("Usage: python hjc.py <file_or_directory>")
sys.exit(1) sys.exit(1)
inputPath = sys.argv[1] inputPath = sys.argv[1]
tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t"
if not os.path.exists(inputPath): if not os.path.exists(inputPath):
print(f"Error: Path '{inputPath}' not found") print(f"Error: Path '{inputPath}' not found")
@@ -834,13 +835,17 @@ def main():
print("Error: Input file must have .jack extension") print("Error: Input file must have .jack extension")
sys.exit(1) sys.exit(1)
if tokenizeOnly: # Generate tokenizer output
outputFile = inputPath[:-5] + "T.xml" tokenizerFile = inputPath[:-5] + "T.xml"
else: analyzeFile(inputPath, tokenizerFile, True)
outputFile = inputPath[:-5] + ".xml"
analyzeFile(inputPath, outputFile, tokenizeOnly) # Generate parser output
print(f"Analyzed '{inputPath}' to '{outputFile}'") parserFile = inputPath[:-5] + ".xml"
analyzeFile(inputPath, parserFile, False)
print(
f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
)
elif os.path.isdir(inputPath): elif os.path.isdir(inputPath):
# directory mode # directory mode
@@ -853,13 +858,15 @@ def main():
for jackFile in jackFiles: for jackFile in jackFiles:
inputFile = os.path.join(inputPath, jackFile) inputFile = os.path.join(inputPath, jackFile)
if tokenizeOnly: # Generate tokenizer output
outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml") tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
else: analyzeFile(inputFile, tokenizerFile, True)
outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
analyzeFile(inputFile, outputFile, tokenizeOnly) # Generate parser output
print(f"Analyzed '{inputFile}' to '{outputFile}'") parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
analyzeFile(inputFile, parserFile, False)
print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")
else: else:
print(f"Error: '{inputPath}' is neither file nor directory") print(f"Error: '{inputPath}' is neither file nor directory")

5
10/reflection.txt Normal file
View File

@@ -0,0 +1,5 @@
Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls.
The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls.
What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.