diff --git a/10/JackAnalyzer.py b/10/hjc.py similarity index 93% rename from 10/JackAnalyzer.py rename to 10/hjc.py index d09266e..1aeeecf 100644 --- a/10/JackAnalyzer.py +++ b/10/hjc.py @@ -241,13 +241,13 @@ class CompilationEngine: def writeOpenTag(self, tag): # write opening XML tag - self.output.write(" " * self.indent + f"<{tag}>\n") + self.output.write(" " * self.indent + f"<{tag}>\r\n") self.indent += 1 def writeCloseTag(self, tag): # write closing XML tag self.indent -= 1 - self.output.write(" " * self.indent + f"\n") + self.output.write(" " * self.indent + f"\r\n") def writeTerminal(self, tag, value): # write terminal (token) XML element @@ -261,7 +261,7 @@ class CompilationEngine: elif value == "&": value = "&" - self.output.write(" " * self.indent + f"<{tag}> {value} \n") + self.output.write(" " * self.indent + f"<{tag}> {value} \r\n") def writeCurrentToken(self): # write current token as XML @@ -456,7 +456,10 @@ class CompilationEngine: self.writeOpenTag("statements") # process statements - while self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]: + while ( + self.tokenizer.getTokenType() == "KEYWORD" + and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"] + ): keyword = self.tokenizer.keyword() if keyword == "let": @@ -470,8 +473,6 @@ class CompilationEngine: elif keyword == "return": self.compileReturn() - self.tokenizer.advance() - self.writeCloseTag("statements") def compileLet(self): @@ -510,6 +511,7 @@ class CompilationEngine: self.writeCurrentToken() self.writeCloseTag("letStatement") + self.tokenizer.advance() def compileIf(self): # compile if statement @@ -556,10 +558,7 @@ class CompilationEngine: # closing brace self.writeCurrentToken() - else: - # no else, back up - return - + self.tokenizer.advance() self.writeCloseTag("ifStatement") def compileWhile(self): @@ -592,6 +591,7 @@ class CompilationEngine: self.writeCurrentToken() self.writeCloseTag("whileStatement") + self.tokenizer.advance() def compileDo(self): # compile do statement @@ -630,6 +630,7 @@ class CompilationEngine: self.writeCurrentToken() self.writeCloseTag("doStatement") + self.tokenizer.advance() def compileReturn(self): # compile return statement @@ -648,6 +649,7 @@ class CompilationEngine: self.writeCurrentToken() self.writeCloseTag("returnStatement") + self.tokenizer.advance() def compileExpression(self): # compile expression @@ -775,7 +777,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False): if tokenizeOnly: # tokenizer test output output = open(outputFile, "w") - output.write("\n") + output.write("\r\n") while tokenizer.hasMoreTokens(): tokenizer.advance() @@ -783,7 +785,7 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False): if tokenType == "KEYWORD": value = tokenizer.keyword() - output.write(f" {value} \n") + output.write(f" {value} \r\n") elif tokenType == "SYMBOL": value = tokenizer.symbol() # escape special characters @@ -795,18 +797,18 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False): value = """ elif value == "&": value = "&" - output.write(f" {value} \n") + output.write(f" {value} \r\n") elif tokenType == "IDENTIFIER": value = tokenizer.identifier() - output.write(f" {value} \n") + output.write(f" {value} \r\n") elif tokenType == "INT_CONST": value = tokenizer.intVal() - output.write(f" {value} \n") + output.write(f" {value} \r\n") elif tokenType == "STRING_CONST": value = tokenizer.stringVal() - output.write(f" {value} \n") + output.write(f" {value} \r\n") - output.write("\n") + output.write("\r\n") output.close() else: # full compilation @@ -817,12 +819,11 @@ def analyzeFile(jackFile, outputFile, tokenizeOnly=False): def main(): # analyze Jack file or directory - if len(sys.argv) < 2: - print("Usage: python JackAnalyzer.py [-t]") + if len(sys.argv) != 2: + print("Usage: python hjc.py ") sys.exit(1) inputPath = sys.argv[1] - tokenizeOnly = len(sys.argv) > 2 and sys.argv[2] == "-t" if not os.path.exists(inputPath): print(f"Error: Path '{inputPath}' not found") @@ -834,13 +835,17 @@ def main(): print("Error: Input file must have .jack extension") sys.exit(1) - if tokenizeOnly: - outputFile = inputPath[:-5] + "T.xml" - else: - outputFile = inputPath[:-5] + ".xml" + # Generate tokenizer output + tokenizerFile = inputPath[:-5] + "T.xml" + analyzeFile(inputPath, tokenizerFile, True) - analyzeFile(inputPath, outputFile, tokenizeOnly) - print(f"Analyzed '{inputPath}' to '{outputFile}'") + # Generate parser output + parserFile = inputPath[:-5] + ".xml" + analyzeFile(inputPath, parserFile, False) + + print( + f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'" + ) elif os.path.isdir(inputPath): # directory mode @@ -853,13 +858,15 @@ def main(): for jackFile in jackFiles: inputFile = os.path.join(inputPath, jackFile) - if tokenizeOnly: - outputFile = os.path.join(inputPath, jackFile[:-5] + "T.xml") - else: - outputFile = os.path.join(inputPath, jackFile[:-5] + ".xml") + # Generate tokenizer output + tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml") + analyzeFile(inputFile, tokenizerFile, True) - analyzeFile(inputFile, outputFile, tokenizeOnly) - print(f"Analyzed '{inputFile}' to '{outputFile}'") + # Generate parser output + parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml") + analyzeFile(inputFile, parserFile, False) + + print(f"Analyzed {len(jackFiles)} files in '{inputPath}'") else: print(f"Error: '{inputPath}' is neither file nor directory") diff --git a/10/reflection.txt b/10/reflection.txt new file mode 100644 index 0000000..dd30d58 --- /dev/null +++ b/10/reflection.txt @@ -0,0 +1,5 @@ +Project 10 was a nice shift from the low-level system building I'd been doing- finally working with language structure and grammar. The modular design philosophy I'd been using since Project 6 carried over well. The JackTokenizer and CompilationEngine split followed the same Parser/CodeWriter pattern from my VM translator, just dealing with a much richer set of tokens and grammar rules. Building the tokenizer was actually straightforward- it's essentially just string parsing that I've done plenty of times before. The comment handling was trickier than expected though, with multi-line comments that span across lines requiring state tracking between advance() calls. + +The compilation engine was where my algorithm/programming language design and computer systems (in cs, 306) courses finally clicked into place. Recursive descent parsing is just grammar rules implemented as methods that call each other- it was elegant, but only once I saw it. Each production rule maps directly to a method, and the recursive calls naturally build the parse tree (which I just so happen to be doing in CS 308, programming language design!). The XML output requirement was actually great for debugging since I could visually inspect the parse tree structure in a browser and catch parsing errors immediately. I hit some tricky edge cases with expression parsing- operator precedence, unary operators, and making sure the tokenizer advanced at exactly the right moments for complex constructs like array access and method calls. + +What really struck me was how this project revealed the hidden complexity of syntax analysis- something I'd always taken for granted as a programmer. Seeing how a parser actually breaks down source code according to grammar rules, handles precedence, and builds a structured representation gave me new appreciation for what happens before compilation even starts. Again, a great compliment to 308, as I'm learning the theory and putting it into practice.