project10 - complete

2026-02-04 15:56:33 -05:00 · 2025-11-20 17:31:44 -05:00
parent 6b233eb9c9
commit 30d7936c6e
2 changed files with 44 additions and 32 deletions
--- a/10/JackAnalyzer.py
+++ b/10/JackAnalyzer.py
@@ -241,13 +241,13 @@ class CompilationEngine:
    def writeOpenTag(self, tag):
        # write opening XML tag
-        self.output.write("  " * self.indent + f"<{tag}>\n")
+        self.output.write("  " * self.indent + f"<{tag}>\r\n")
        self.indent += 1
    def writeCloseTag(self, tag):
        # write closing XML tag
        self.indent -= 1
-        self.output.write("  " * self.indent + f"</{tag}>\n")
+        self.output.write("  " * self.indent + f"</{tag}>\r\n")
    def writeTerminal(self, tag, value):
        # write terminal (token) XML element
@@ -261,7 +261,7 @@ class CompilationEngine:
        elif value == "&":
            value = "&amp;"
-        self.output.write("  " * self.indent + f"<{tag}> {value} </{tag}>\n")
+        self.output.write("  " * self.indent + f"<{tag}> {value} </{tag}>\r\n")
    def writeCurrentToken(self):
        # write current token as XML
@@ -456,7 +456,10 @@ class CompilationEngine:
        self.writeOpenTag("statements")
        # process statements
-        while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]:
+        while (
            self.tokenizer.getTokenType() == "KEYWORD"
            and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
        ):
            keyword = self.tokenizer.keyword()
            if keyword == "let":
@@ -470,8 +473,6 @@ class CompilationEngine:
            elif keyword == "return":
                self.compileReturn()
            self.tokenizer.advance()
        self.writeCloseTag("statements")
    def compileLet(self):
@@ -510,6 +511,7 @@ class CompilationEngine:
        self.writeCurrentToken()
        self.writeCloseTag("letStatement")
        self.tokenizer.advance()
    def compileIf(self):
        # compile if statement
@@ -556,10 +558,7 @@ class CompilationEngine:
            # closing brace
            self.writeCurrentToken()
-        else:
+            self.tokenizer.advance()
            # no else, back up
            return
        self.writeCloseTag("ifStatement")
    def compileWhile(self):
@@ -592,6 +591,7 @@ class CompilationEngine:
        self.writeCurrentToken()
        self.writeCloseTag("whileStatement")
        self.tokenizer.advance()
    def compileDo(self):
        # compile do statement
@@ -630,6 +630,7 @@ class CompilationEngine:
        self.writeCurrentToken()
        self.writeCloseTag("doStatement")
        self.tokenizer.advance()
    def compileReturn(self):
        # compile return statement
@@ -648,6 +649,7 @@ class CompilationEngine:
        self.writeCurrentToken()
        self.writeCloseTag("returnStatement")
        self.tokenizer.advance()
    def compileExpression(self):
        # compile expression
@@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
    if tokenizeOnly:
        # tokenizer test output
        output = open(outputFile, "w")
-        output.write("<tokens>\n")
+        output.write("<tokens>\r\n")
        while tokenizer.hasMoreTokens():
            tokenizer.advance()
@@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
            if tokenType == "KEYWORD":
                value = tokenizer.keyword()
-                output.write(f"<keyword> {value} </keyword>\n")
+                output.write(f"<keyword> {value} </keyword>\r\n")
            elif tokenType == "SYMBOL":
                value = tokenizer.symbol()
                # escape special characters
@@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
                    value = "&quot;"
                elif value == "&":
                    value = "&amp;"
-                output.write(f"<symbol> {value} </symbol>\n")
+                output.write(f"<symbol> {value} </symbol>\r\n")
            elif tokenType == "IDENTIFIER":
                value = tokenizer.identifier()
-                output.write(f"<identifier> {value} </identifier>\n")
+                output.write(f"<identifier> {value} </identifier>\r\n")
            elif tokenType == "INT_CONST":
                value = tokenizer.intVal()
-                output.write(f"<integerConstant> {value} </integerConstant>\n")
+                output.write(f"<integerConstant> {value} </integerConstant>\r\n")
            elif tokenType == "STRING_CONST":
                value = tokenizer.stringVal()
-                output.write(f"<stringConstant> {value} </stringConstant>\n")
+                output.write(f"<stringConstant> {value} </stringConstant>\r\n")
-        output.write("</tokens>\n")
+        output.write("</tokens>\r\n")
        output.close()
    else:
        # full compilation
@@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
 def main():
    # analyze Jack file or directory
-    if len(sys.argv) < 2:
+    if len(sys.argv) != 2:
-        print("Usage: python JackAnalyzer.py <file_or_directory> [-t]")
+        print("Usage: python hjc.py <file_or_directory>")
        sys.exit(1)
    inputPath = sys.argv[1]
    tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t"
    if not os.path.exists(inputPath):
        print(f"Error: Path '{inputPath}' not found")
@@ -834,13 +835,17 @@ def main():
            print("Error: Input file must have .jack extension")
            sys.exit(1)
-        if tokenizeOnly:
+        # Generate tokenizer output
-            outputFile = inputPath[:-5] + "T.xml"
+        tokenizerFile = inputPath[:-5] + "T.xml"
-        else:
+        analyzeFile(inputPath, tokenizerFile, True)
            outputFile = inputPath[:-5] + ".xml"
-        analyzeFile(inputPath, outputFile, tokenizeOnly)
+        # Generate parser output
-        print(f"Analyzed '{inputPath}' to '{outputFile}'")
+        parserFile = inputPath[:-5] + ".xml"
        analyzeFile(inputPath, parserFile, False)
        print(
            f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
        )
    elif os.path.isdir(inputPath):
        # directory mode
@@ -853,13 +858,15 @@ def main():
        for jackFile in jackFiles:
            inputFile = os.path.join(inputPath, jackFile)
-            if tokenizeOnly:
+            # Generate tokenizer output
-                outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
+            tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
-            else:
+            analyzeFile(inputFile, tokenizerFile, True)
                outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
-            analyzeFile(inputFile, outputFile, tokenizeOnly)
+            # Generate parser output
-            print(f"Analyzed '{inputFile}' to '{outputFile}'")
+            parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
            analyzeFile(inputFile, parserFile, False)
        print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")
    else:
        print(f"Error: '{inputPath}' is neither file nor directory")
--- a/10/reflection.txt
+++ b/10/reflection.txt
@@ -0,0 +1,5 @@
 Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls.
 The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls.
 What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.