mirror of
https://github.com/soconnor0919/eceg431.git
synced 2025-12-11 06:34:43 -05:00
project10 - complete
This commit is contained in:
@@ -241,13 +241,13 @@ class CompilationEngine:
|
|||||||
|
|
||||||
def writeOpenTag(self, tag):
|
def writeOpenTag(self, tag):
|
||||||
# write opening XML tag
|
# write opening XML tag
|
||||||
self.output.write(" " * self.indent + f"<{tag}>\n")
|
self.output.write(" " * self.indent + f"<{tag}>\r\n")
|
||||||
self.indent += 1
|
self.indent += 1
|
||||||
|
|
||||||
def writeCloseTag(self, tag):
|
def writeCloseTag(self, tag):
|
||||||
# write closing XML tag
|
# write closing XML tag
|
||||||
self.indent -= 1
|
self.indent -= 1
|
||||||
self.output.write(" " * self.indent + f"</{tag}>\n")
|
self.output.write(" " * self.indent + f"</{tag}>\r\n")
|
||||||
|
|
||||||
def writeTerminal(self, tag, value):
|
def writeTerminal(self, tag, value):
|
||||||
# write terminal (token) XML element
|
# write terminal (token) XML element
|
||||||
@@ -261,7 +261,7 @@ class CompilationEngine:
|
|||||||
elif value == "&":
|
elif value == "&":
|
||||||
value = "&"
|
value = "&"
|
||||||
|
|
||||||
self.output.write(" " * self.indent + f"<{tag}> {value} </{tag}>\n")
|
self.output.write(" " * self.indent + f"<{tag}> {value} </{tag}>\r\n")
|
||||||
|
|
||||||
def writeCurrentToken(self):
|
def writeCurrentToken(self):
|
||||||
# write current token as XML
|
# write current token as XML
|
||||||
@@ -456,7 +456,10 @@ class CompilationEngine:
|
|||||||
self.writeOpenTag("statements")
|
self.writeOpenTag("statements")
|
||||||
|
|
||||||
# process statements
|
# process statements
|
||||||
while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]:
|
while (
|
||||||
|
self.tokenizer.getTokenType() == "KEYWORD"
|
||||||
|
and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
|
||||||
|
):
|
||||||
keyword = self.tokenizer.keyword()
|
keyword = self.tokenizer.keyword()
|
||||||
|
|
||||||
if keyword == "let":
|
if keyword == "let":
|
||||||
@@ -470,8 +473,6 @@ class CompilationEngine:
|
|||||||
elif keyword == "return":
|
elif keyword == "return":
|
||||||
self.compileReturn()
|
self.compileReturn()
|
||||||
|
|
||||||
self.tokenizer.advance()
|
|
||||||
|
|
||||||
self.writeCloseTag("statements")
|
self.writeCloseTag("statements")
|
||||||
|
|
||||||
def compileLet(self):
|
def compileLet(self):
|
||||||
@@ -510,6 +511,7 @@ class CompilationEngine:
|
|||||||
self.writeCurrentToken()
|
self.writeCurrentToken()
|
||||||
|
|
||||||
self.writeCloseTag("letStatement")
|
self.writeCloseTag("letStatement")
|
||||||
|
self.tokenizer.advance()
|
||||||
|
|
||||||
def compileIf(self):
|
def compileIf(self):
|
||||||
# compile if statement
|
# compile if statement
|
||||||
@@ -556,10 +558,7 @@ class CompilationEngine:
|
|||||||
|
|
||||||
# closing brace
|
# closing brace
|
||||||
self.writeCurrentToken()
|
self.writeCurrentToken()
|
||||||
else:
|
self.tokenizer.advance()
|
||||||
# no else, back up
|
|
||||||
return
|
|
||||||
|
|
||||||
self.writeCloseTag("ifStatement")
|
self.writeCloseTag("ifStatement")
|
||||||
|
|
||||||
def compileWhile(self):
|
def compileWhile(self):
|
||||||
@@ -592,6 +591,7 @@ class CompilationEngine:
|
|||||||
self.writeCurrentToken()
|
self.writeCurrentToken()
|
||||||
|
|
||||||
self.writeCloseTag("whileStatement")
|
self.writeCloseTag("whileStatement")
|
||||||
|
self.tokenizer.advance()
|
||||||
|
|
||||||
def compileDo(self):
|
def compileDo(self):
|
||||||
# compile do statement
|
# compile do statement
|
||||||
@@ -630,6 +630,7 @@ class CompilationEngine:
|
|||||||
self.writeCurrentToken()
|
self.writeCurrentToken()
|
||||||
|
|
||||||
self.writeCloseTag("doStatement")
|
self.writeCloseTag("doStatement")
|
||||||
|
self.tokenizer.advance()
|
||||||
|
|
||||||
def compileReturn(self):
|
def compileReturn(self):
|
||||||
# compile return statement
|
# compile return statement
|
||||||
@@ -648,6 +649,7 @@ class CompilationEngine:
|
|||||||
self.writeCurrentToken()
|
self.writeCurrentToken()
|
||||||
|
|
||||||
self.writeCloseTag("returnStatement")
|
self.writeCloseTag("returnStatement")
|
||||||
|
self.tokenizer.advance()
|
||||||
|
|
||||||
def compileExpression(self):
|
def compileExpression(self):
|
||||||
# compile expression
|
# compile expression
|
||||||
@@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
|
|||||||
if tokenizeOnly:
|
if tokenizeOnly:
|
||||||
# tokenizer test output
|
# tokenizer test output
|
||||||
output = open(outputFile, "w")
|
output = open(outputFile, "w")
|
||||||
output.write("<tokens>\n")
|
output.write("<tokens>\r\n")
|
||||||
|
|
||||||
while tokenizer.hasMoreTokens():
|
while tokenizer.hasMoreTokens():
|
||||||
tokenizer.advance()
|
tokenizer.advance()
|
||||||
@@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
|
|||||||
|
|
||||||
if tokenType == "KEYWORD":
|
if tokenType == "KEYWORD":
|
||||||
value = tokenizer.keyword()
|
value = tokenizer.keyword()
|
||||||
output.write(f"<keyword> {value} </keyword>\n")
|
output.write(f"<keyword> {value} </keyword>\r\n")
|
||||||
elif tokenType == "SYMBOL":
|
elif tokenType == "SYMBOL":
|
||||||
value = tokenizer.symbol()
|
value = tokenizer.symbol()
|
||||||
# escape special characters
|
# escape special characters
|
||||||
@@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
|
|||||||
value = """
|
value = """
|
||||||
elif value == "&":
|
elif value == "&":
|
||||||
value = "&"
|
value = "&"
|
||||||
output.write(f"<symbol> {value} </symbol>\n")
|
output.write(f"<symbol> {value} </symbol>\r\n")
|
||||||
elif tokenType == "IDENTIFIER":
|
elif tokenType == "IDENTIFIER":
|
||||||
value = tokenizer.identifier()
|
value = tokenizer.identifier()
|
||||||
output.write(f"<identifier> {value} </identifier>\n")
|
output.write(f"<identifier> {value} </identifier>\r\n")
|
||||||
elif tokenType == "INT_CONST":
|
elif tokenType == "INT_CONST":
|
||||||
value = tokenizer.intVal()
|
value = tokenizer.intVal()
|
||||||
output.write(f"<integerConstant> {value} </integerConstant>\n")
|
output.write(f"<integerConstant> {value} </integerConstant>\r\n")
|
||||||
elif tokenType == "STRING_CONST":
|
elif tokenType == "STRING_CONST":
|
||||||
value = tokenizer.stringVal()
|
value = tokenizer.stringVal()
|
||||||
output.write(f"<stringConstant> {value} </stringConstant>\n")
|
output.write(f"<stringConstant> {value} </stringConstant>\r\n")
|
||||||
|
|
||||||
output.write("</tokens>\n")
|
output.write("</tokens>\r\n")
|
||||||
output.close()
|
output.close()
|
||||||
else:
|
else:
|
||||||
# full compilation
|
# full compilation
|
||||||
@@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
# analyze Jack file or directory
|
# analyze Jack file or directory
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) != 2:
|
||||||
print("Usage: python JackAnalyzer.py <file_or_directory> [-t]")
|
print("Usage: python hjc.py <file_or_directory>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
inputPath = sys.argv[1]
|
inputPath = sys.argv[1]
|
||||||
tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t"
|
|
||||||
|
|
||||||
if not os.path.exists(inputPath):
|
if not os.path.exists(inputPath):
|
||||||
print(f"Error: Path '{inputPath}' not found")
|
print(f"Error: Path '{inputPath}' not found")
|
||||||
@@ -834,13 +835,17 @@ def main():
|
|||||||
print("Error: Input file must have .jack extension")
|
print("Error: Input file must have .jack extension")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if tokenizeOnly:
|
# Generate tokenizer output
|
||||||
outputFile = inputPath[:-5] + "T.xml"
|
tokenizerFile = inputPath[:-5] + "T.xml"
|
||||||
else:
|
analyzeFile(inputPath, tokenizerFile, True)
|
||||||
outputFile = inputPath[:-5] + ".xml"
|
|
||||||
|
|
||||||
analyzeFile(inputPath, outputFile, tokenizeOnly)
|
# Generate parser output
|
||||||
print(f"Analyzed '{inputPath}' to '{outputFile}'")
|
parserFile = inputPath[:-5] + ".xml"
|
||||||
|
analyzeFile(inputPath, parserFile, False)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
|
||||||
|
)
|
||||||
|
|
||||||
elif os.path.isdir(inputPath):
|
elif os.path.isdir(inputPath):
|
||||||
# directory mode
|
# directory mode
|
||||||
@@ -853,13 +858,15 @@ def main():
|
|||||||
for jackFile in jackFiles:
|
for jackFile in jackFiles:
|
||||||
inputFile = os.path.join(inputPath, jackFile)
|
inputFile = os.path.join(inputPath, jackFile)
|
||||||
|
|
||||||
if tokenizeOnly:
|
# Generate tokenizer output
|
||||||
outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
|
tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
|
||||||
else:
|
analyzeFile(inputFile, tokenizerFile, True)
|
||||||
outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
|
|
||||||
|
|
||||||
analyzeFile(inputFile, outputFile, tokenizeOnly)
|
# Generate parser output
|
||||||
print(f"Analyzed '{inputFile}' to '{outputFile}'")
|
parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
|
||||||
|
analyzeFile(inputFile, parserFile, False)
|
||||||
|
|
||||||
|
print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(f"Error: '{inputPath}' is neither file nor directory")
|
print(f"Error: '{inputPath}' is neither file nor directory")
|
||||||
5
10/reflection.txt
Normal file
5
10/reflection.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls.
|
||||||
|
|
||||||
|
The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls.
|
||||||
|
|
||||||
|
What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.
|
||||||
Reference in New Issue
Block a user