diff --git a/10/JackAnalyzer.py b/10/hjc.py
similarity index 93%
rename from 10/JackAnalyzer.py
rename to 10/hjc.py
index d09266e..1aeeecf 100644
--- a/10/JackAnalyzer.py
+++ b/10/hjc.py
@@ -241,13 +241,13 @@ class CompilationEngine:
def writeOpenTag(self, tag):
# write opening XML tag
- self.output.write(" " * self.indent + f"<{tag}>\n")
+ self.output.write(" " * self.indent + f"<{tag}>\r\n")
self.indent += 1
def writeCloseTag(self, tag):
# write closing XML tag
self.indent -= 1
- self.output.write(" " * self.indent + f"{tag}>\n")
+ self.output.write(" " * self.indent + f"{tag}>\r\n")
def writeTerminal(self, tag, value):
# write terminal (token) XML element
@@ -261,7 +261,7 @@ class CompilationEngine:
elif value == "&":
value = "&"
- self.output.write(" " * self.indent + f"<{tag}> {value} {tag}>\n")
+ self.output.write(" " * self.indent + f"<{tag}> {value} {tag}>\r\n")
def writeCurrentToken(self):
# write current token as XML
@@ -456,7 +456,10 @@ class CompilationEngine:
self.writeOpenTag("statements")
# process statements
- while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]:
+ while (
+ self.tokenizer.getTokenType() == "KEYWORD"
+ and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
+ ):
keyword = self.tokenizer.keyword()
if keyword == "let":
@@ -470,8 +473,6 @@ class CompilationEngine:
elif keyword == "return":
self.compileReturn()
- self.tokenizer.advance()
-
self.writeCloseTag("statements")
def compileLet(self):
@@ -510,6 +511,7 @@ class CompilationEngine:
self.writeCurrentToken()
self.writeCloseTag("letStatement")
+ self.tokenizer.advance()
def compileIf(self):
# compile if statement
@@ -556,10 +558,7 @@ class CompilationEngine:
# closing brace
self.writeCurrentToken()
- else:
- # no else, back up
- return
-
+ self.tokenizer.advance()
self.writeCloseTag("ifStatement")
def compileWhile(self):
@@ -592,6 +591,7 @@ class CompilationEngine:
self.writeCurrentToken()
self.writeCloseTag("whileStatement")
+ self.tokenizer.advance()
def compileDo(self):
# compile do statement
@@ -630,6 +630,7 @@ class CompilationEngine:
self.writeCurrentToken()
self.writeCloseTag("doStatement")
+ self.tokenizer.advance()
def compileReturn(self):
# compile return statement
@@ -648,6 +649,7 @@ class CompilationEngine:
self.writeCurrentToken()
self.writeCloseTag("returnStatement")
+ self.tokenizer.advance()
def compileExpression(self):
# compile expression
@@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
if tokenizeOnly:
# tokenizer test output
output = open(outputFile, "w")
- output.write("\n")
+ output.write("\r\n")
while tokenizer.hasMoreTokens():
tokenizer.advance()
@@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
if tokenType == "KEYWORD":
value = tokenizer.keyword()
- output.write(f" {value} \n")
+ output.write(f" {value} \r\n")
elif tokenType == "SYMBOL":
value = tokenizer.symbol()
# escape special characters
@@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
value = """
elif value == "&":
value = "&"
- output.write(f" {value} \n")
+ output.write(f" {value} \r\n")
elif tokenType == "IDENTIFIER":
value = tokenizer.identifier()
- output.write(f" {value} \n")
+ output.write(f" {value} \r\n")
elif tokenType == "INT_CONST":
value = tokenizer.intVal()
- output.write(f" {value} \n")
+ output.write(f" {value} \r\n")
elif tokenType == "STRING_CONST":
value = tokenizer.stringVal()
- output.write(f" {value} \n")
+ output.write(f" {value} \r\n")
- output.write("\n")
+ output.write("\r\n")
output.close()
else:
# full compilation
@@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
def main():
# analyze Jack file or directory
- if len(sys.argv) < 2:
- print("Usage: python JackAnalyzer.py [-t]")
+ if len(sys.argv) != 2:
+ print("Usage: python hjc.py ")
sys.exit(1)
inputPath = sys.argv[1]
- tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t"
if not os.path.exists(inputPath):
print(f"Error: Path '{inputPath}' not found")
@@ -834,13 +835,17 @@ def main():
print("Error: Input file must have .jack extension")
sys.exit(1)
- if tokenizeOnly:
- outputFile = inputPath[:-5] + "T.xml"
- else:
- outputFile = inputPath[:-5] + ".xml"
+ # Generate tokenizer output
+ tokenizerFile = inputPath[:-5] + "T.xml"
+ analyzeFile(inputPath, tokenizerFile, True)
- analyzeFile(inputPath, outputFile, tokenizeOnly)
- print(f"Analyzed '{inputPath}' to '{outputFile}'")
+ # Generate parser output
+ parserFile = inputPath[:-5] + ".xml"
+ analyzeFile(inputPath, parserFile, False)
+
+ print(
+ f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
+ )
elif os.path.isdir(inputPath):
# directory mode
@@ -853,13 +858,15 @@ def main():
for jackFile in jackFiles:
inputFile = os.path.join(inputPath, jackFile)
- if tokenizeOnly:
- outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
- else:
- outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
+ # Generate tokenizer output
+ tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
+ analyzeFile(inputFile, tokenizerFile, True)
- analyzeFile(inputFile, outputFile, tokenizeOnly)
- print(f"Analyzed '{inputFile}' to '{outputFile}'")
+ # Generate parser output
+ parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
+ analyzeFile(inputFile, parserFile, False)
+
+ print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")
else:
print(f"Error: '{inputPath}' is neither file nor directory")
diff --git a/10/reflection.txt b/10/reflection.txt
new file mode 100644
index 0000000..dd30d58
--- /dev/null
+++ b/10/reflection.txt
@@ -0,0 +1,5 @@
+Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls.
+
+The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls.
+
+What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.