project10 - complete

2026-02-04 15:56:33 -05:00 · 2025-11-20 17:31:44 -05:00
parent 6b233eb9c9
commit 30d7936c6e
2 changed files with 44 additions and 32 deletions
--- a/10/JackAnalyzer.py
+++ b/10/JackAnalyzer.py
@@ -241,13 +241,13 @@ class CompilationEngine:

    def writeOpenTag(self, tag):
        # write opening XML tag
-        self.output.write("  " * self.indent + f"<{tag}>\n")
+        self.output.write("  " * self.indent + f"<{tag}>\r\n")
        self.indent += 1

    def writeCloseTag(self, tag):
        # write closing XML tag
        self.indent -= 1
-        self.output.write("  " * self.indent + f"</{tag}>\n")
+        self.output.write("  " * self.indent + f"</{tag}>\r\n")

    def writeTerminal(self, tag, value):
        # write terminal (token) XML element
@@ -261,7 +261,7 @@ class CompilationEngine:
        elif value == "&":
            value = "&amp;"

-        self.output.write("  " * self.indent + f"<{tag}> {value} </{tag}>\n")
+        self.output.write("  " * self.indent + f"<{tag}> {value} </{tag}>\r\n")

    def writeCurrentToken(self):
        # write current token as XML
@@ -456,7 +456,10 @@ class CompilationEngine:
        self.writeOpenTag("statements")

        # process statements
-        while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]:
+        while (
+            self.tokenizer.getTokenType() == "KEYWORD"
+            and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
+        ):
            keyword = self.tokenizer.keyword()

            if keyword == "let":
@@ -470,8 +473,6 @@ class CompilationEngine:
            elif keyword == "return":
                self.compileReturn()

-            self.tokenizer.advance()
-
        self.writeCloseTag("statements")

    def compileLet(self):
@@ -510,6 +511,7 @@ class CompilationEngine:
        self.writeCurrentToken()

        self.writeCloseTag("letStatement")
+        self.tokenizer.advance()

    def compileIf(self):
        # compile if statement
@@ -556,10 +558,7 @@ class CompilationEngine:

            # closing brace
            self.writeCurrentToken()
-        else:
-            # no else, back up
-            return
-
+            self.tokenizer.advance()
        self.writeCloseTag("ifStatement")

    def compileWhile(self):
@@ -592,6 +591,7 @@ class CompilationEngine:
        self.writeCurrentToken()

        self.writeCloseTag("whileStatement")
+        self.tokenizer.advance()

    def compileDo(self):
        # compile do statement
@@ -630,6 +630,7 @@ class CompilationEngine:
        self.writeCurrentToken()

        self.writeCloseTag("doStatement")
+        self.tokenizer.advance()

    def compileReturn(self):
        # compile return statement
@@ -648,6 +649,7 @@ class CompilationEngine:
        self.writeCurrentToken()

        self.writeCloseTag("returnStatement")
+        self.tokenizer.advance()

    def compileExpression(self):
        # compile expression
@@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
    if tokenizeOnly:
        # tokenizer test output
        output = open(outputFile, "w")
-        output.write("<tokens>\n")
+        output.write("<tokens>\r\n")

        while tokenizer.hasMoreTokens():
            tokenizer.advance()
@@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):

            if tokenType == "KEYWORD":
                value = tokenizer.keyword()
-                output.write(f"<keyword> {value} </keyword>\n")
+                output.write(f"<keyword> {value} </keyword>\r\n")
            elif tokenType == "SYMBOL":
                value = tokenizer.symbol()
                # escape special characters
@@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
                    value = "&quot;"
                elif value == "&":
                    value = "&amp;"
-                output.write(f"<symbol> {value} </symbol>\n")
+                output.write(f"<symbol> {value} </symbol>\r\n")
            elif tokenType == "IDENTIFIER":
                value = tokenizer.identifier()
-                output.write(f"<identifier> {value} </identifier>\n")
+                output.write(f"<identifier> {value} </identifier>\r\n")
            elif tokenType == "INT_CONST":
                value = tokenizer.intVal()
-                output.write(f"<integerConstant> {value} </integerConstant>\n")
+                output.write(f"<integerConstant> {value} </integerConstant>\r\n")
            elif tokenType == "STRING_CONST":
                value = tokenizer.stringVal()
-                output.write(f"<stringConstant> {value} </stringConstant>\n")
+                output.write(f"<stringConstant> {value} </stringConstant>\r\n")

-        output.write("</tokens>\n")
+        output.write("</tokens>\r\n")
        output.close()
    else:
        # full compilation
@@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False):

 def main():
    # analyze Jack file or directory
-    if len(sys.argv) < 2:
-        print("Usage: python JackAnalyzer.py <file_or_directory> [-t]")
+    if len(sys.argv) != 2:
+        print("Usage: python hjc.py <file_or_directory>")
        sys.exit(1)

    inputPath = sys.argv[1]
-    tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t"

    if not os.path.exists(inputPath):
        print(f"Error: Path '{inputPath}' not found")
@@ -834,13 +835,17 @@ def main():
            print("Error: Input file must have .jack extension")
            sys.exit(1)

-        if tokenizeOnly:
-            outputFile = inputPath[:-5] + "T.xml"
-        else:
-            outputFile = inputPath[:-5] + ".xml"
+        # Generate tokenizer output
+        tokenizerFile = inputPath[:-5] + "T.xml"
+        analyzeFile(inputPath, tokenizerFile, True)

-        analyzeFile(inputPath, outputFile, tokenizeOnly)
-        print(f"Analyzed '{inputPath}' to '{outputFile}'")
+        # Generate parser output
+        parserFile = inputPath[:-5] + ".xml"
+        analyzeFile(inputPath, parserFile, False)
+
+        print(
+            f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
+        )

    elif os.path.isdir(inputPath):
        # directory mode
@@ -853,13 +858,15 @@ def main():
        for jackFile in jackFiles:
            inputFile = os.path.join(inputPath, jackFile)

-            if tokenizeOnly:
-                outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
-            else:
-                outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
+            # Generate tokenizer output
+            tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
+            analyzeFile(inputFile, tokenizerFile, True)

-            analyzeFile(inputFile, outputFile, tokenizeOnly)
-            print(f"Analyzed '{inputFile}' to '{outputFile}'")
+            # Generate parser output
+            parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
+            analyzeFile(inputFile, parserFile, False)
+
+        print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")

    else:
        print(f"Error: '{inputPath}' is neither file nor directory")
--- a/10/reflection.txt
+++ b/10/reflection.txt
@@ -0,0 +1,5 @@
+Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls.
+
+The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls.
+
+What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.