mirror of
https://github.com/soconnor0919/eceg431.git
synced 2025-12-11 06:34:43 -05:00
878 lines
25 KiB
Python
878 lines
25 KiB
Python
import os
|
|
import sys
|
|
|
|
|
|
class JackTokenizer:
|
|
# tokenizes Jack source code
|
|
|
|
def __init__(self, filename):
|
|
# load and clean Jack file
|
|
self.lines = []
|
|
self.currentLine = ""
|
|
self.lineNumber = 0
|
|
self.inComment = False
|
|
|
|
# current token info
|
|
self.currentToken = ""
|
|
self.tokenType = ""
|
|
|
|
# Jack language keywords
|
|
self.keywords = {
|
|
"class",
|
|
"constructor",
|
|
"function",
|
|
"method",
|
|
"field",
|
|
"static",
|
|
"var",
|
|
"int",
|
|
"char",
|
|
"boolean",
|
|
"void",
|
|
"true",
|
|
"false",
|
|
"null",
|
|
"this",
|
|
"let",
|
|
"do",
|
|
"if",
|
|
"else",
|
|
"while",
|
|
"return",
|
|
}
|
|
|
|
# Jack language symbols
|
|
self.symbols = {
|
|
"{",
|
|
"}",
|
|
"(",
|
|
")",
|
|
"[",
|
|
"]",
|
|
".",
|
|
",",
|
|
";",
|
|
"+",
|
|
"-",
|
|
"*",
|
|
"/",
|
|
"&",
|
|
"|",
|
|
"<",
|
|
">",
|
|
"=",
|
|
"~",
|
|
}
|
|
|
|
# read file
|
|
with open(filename, "r") as file:
|
|
self.lines = file.readlines()
|
|
|
|
def hasMoreTokens(self):
|
|
# check if more tokens available
|
|
# still have content on current line or more lines to process
|
|
return len(self.currentLine) > 0 or self.lineNumber < len(self.lines)
|
|
|
|
def advance(self):
|
|
# get next token from input
|
|
while True:
|
|
# check if current line is empty
|
|
if len(self.currentLine) == 0:
|
|
# get new line
|
|
if self.lineNumber >= len(self.lines):
|
|
# end of file
|
|
return False
|
|
|
|
self.currentLine = self.lines[self.lineNumber]
|
|
self.lineNumber += 1
|
|
|
|
# remove newline
|
|
if self.currentLine.endswith("\n"):
|
|
self.currentLine = self.currentLine[:-1]
|
|
|
|
# handle comments
|
|
# remove inline comments
|
|
if "//" in self.currentLine:
|
|
self.currentLine = self.currentLine[: self.currentLine.index("//")]
|
|
|
|
# handle multi-line comments
|
|
if self.inComment:
|
|
# check for comment end
|
|
if "*/" in self.currentLine:
|
|
endIdx = self.currentLine.index("*/") + 2
|
|
self.currentLine = self.currentLine[endIdx:]
|
|
self.inComment = False
|
|
else:
|
|
# still in comment, skip this line
|
|
self.currentLine = ""
|
|
continue
|
|
|
|
# check for comment start
|
|
if "/*" in self.currentLine:
|
|
startIdx = self.currentLine.index("/*")
|
|
# check if comment ends on same line
|
|
if "*/" in self.currentLine[startIdx:]:
|
|
endIdx = self.currentLine.index("*/", startIdx) + 2
|
|
self.currentLine = (
|
|
self.currentLine[:startIdx]
|
|
+ " "
|
|
+ self.currentLine[endIdx:]
|
|
)
|
|
else:
|
|
# comment continues to next line
|
|
self.currentLine = self.currentLine[:startIdx]
|
|
self.inComment = True
|
|
|
|
# replace tabs with spaces and strip
|
|
self.currentLine = self.currentLine.replace("\t", " ").strip()
|
|
|
|
# if line is empty after cleaning, get next line
|
|
if len(self.currentLine) == 0:
|
|
continue
|
|
|
|
# parse token from current line
|
|
# skip leading spaces
|
|
self.currentLine = self.currentLine.lstrip()
|
|
|
|
if len(self.currentLine) == 0:
|
|
continue
|
|
|
|
# check first character
|
|
firstChar = self.currentLine[0]
|
|
|
|
# check if symbol
|
|
if firstChar in self.symbols:
|
|
self.currentToken = firstChar
|
|
self.tokenType = "SYMBOL"
|
|
self.currentLine = self.currentLine[1:]
|
|
return True
|
|
|
|
# check if string constant
|
|
if firstChar == '"':
|
|
# find closing quote
|
|
endIdx = self.currentLine.index('"', 1)
|
|
self.currentToken = self.currentLine[1:endIdx]
|
|
self.tokenType = "STRING_CONST"
|
|
self.currentLine = self.currentLine[endIdx + 1 :]
|
|
return True
|
|
|
|
# check if integer constant
|
|
if firstChar.isdigit():
|
|
# parse integer
|
|
endIdx = 0
|
|
while (
|
|
endIdx < len(self.currentLine)
|
|
and self.currentLine[endIdx].isdigit()
|
|
):
|
|
endIdx += 1
|
|
self.currentToken = self.currentLine[:endIdx]
|
|
self.tokenType = "INT_CONST"
|
|
self.currentLine = self.currentLine[endIdx:]
|
|
return True
|
|
|
|
# must be identifier or keyword
|
|
if firstChar.isalpha() or firstChar == "_":
|
|
# parse identifier
|
|
endIdx = 0
|
|
while endIdx < len(self.currentLine):
|
|
char = self.currentLine[endIdx]
|
|
if char.isalnum() or char == "_":
|
|
endIdx += 1
|
|
else:
|
|
break
|
|
|
|
self.currentToken = self.currentLine[:endIdx]
|
|
self.currentLine = self.currentLine[endIdx:]
|
|
|
|
# check if keyword
|
|
if self.currentToken in self.keywords:
|
|
self.tokenType = "KEYWORD"
|
|
else:
|
|
self.tokenType = "IDENTIFIER"
|
|
|
|
return True
|
|
|
|
# shouldn't reach here with valid Jack code
|
|
self.currentLine = self.currentLine[1:]
|
|
|
|
def getTokenType(self):
|
|
# return current token type
|
|
return self.tokenType
|
|
|
|
def keyword(self):
|
|
# return keyword (only if token is keyword)
|
|
if self.tokenType == "KEYWORD":
|
|
return self.currentToken
|
|
return None
|
|
|
|
def symbol(self):
|
|
# return symbol (only if token is symbol)
|
|
if self.tokenType == "SYMBOL":
|
|
return self.currentToken
|
|
return None
|
|
|
|
def identifier(self):
|
|
# return identifier (only if token is identifier)
|
|
if self.tokenType == "IDENTIFIER":
|
|
return self.currentToken
|
|
return None
|
|
|
|
def intVal(self):
|
|
# return integer value (only if token is int)
|
|
if self.tokenType == "INT_CONST":
|
|
return int(self.currentToken)
|
|
return None
|
|
|
|
def stringVal(self):
|
|
# return string value (only if token is string)
|
|
if self.tokenType == "STRING_CONST":
|
|
return self.currentToken
|
|
return None
|
|
|
|
|
|
class CompilationEngine:
|
|
# generates XML from Jack code
|
|
|
|
def __init__(self, tokenizer, output_file):
|
|
# init compilation engine
|
|
self.tokenizer = tokenizer
|
|
self.output = open(output_file, "w")
|
|
self.indent = 0
|
|
|
|
def writeOpenTag(self, tag):
|
|
# write opening XML tag
|
|
self.output.write(" " * self.indent + f"<{tag}>\r\n")
|
|
self.indent += 1
|
|
|
|
def writeCloseTag(self, tag):
|
|
# write closing XML tag
|
|
self.indent -= 1
|
|
self.output.write(" " * self.indent + f"</{tag}>\r\n")
|
|
|
|
def writeTerminal(self, tag, value):
|
|
# write terminal (token) XML element
|
|
# escape special characters
|
|
if value == "<":
|
|
value = "<"
|
|
elif value == ">":
|
|
value = ">"
|
|
elif value == '"':
|
|
value = """
|
|
elif value == "&":
|
|
value = "&"
|
|
|
|
self.output.write(" " * self.indent + f"<{tag}> {value} </{tag}>\r\n")
|
|
|
|
def writeCurrentToken(self):
|
|
# write current token as XML
|
|
tokenType = self.tokenizer.getTokenType()
|
|
|
|
if tokenType == "KEYWORD":
|
|
self.writeTerminal("keyword", self.tokenizer.keyword())
|
|
elif tokenType == "SYMBOL":
|
|
self.writeTerminal("symbol", self.tokenizer.symbol())
|
|
elif tokenType == "IDENTIFIER":
|
|
self.writeTerminal("identifier", self.tokenizer.identifier())
|
|
elif tokenType == "INT_CONST":
|
|
self.writeTerminal("integerConstant", str(self.tokenizer.intVal()))
|
|
elif tokenType == "STRING_CONST":
|
|
self.writeTerminal("stringConstant", self.tokenizer.stringVal())
|
|
|
|
def compileClass(self):
|
|
# compile complete class
|
|
self.writeOpenTag("class")
|
|
|
|
# class keyword
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# class name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# opening brace
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# class var declarations
|
|
self.tokenizer.advance()
|
|
while self.tokenizer.keyword() in ["static", "field"]:
|
|
self.compileClassVarDec()
|
|
self.tokenizer.advance()
|
|
|
|
# subroutine declarations
|
|
while self.tokenizer.keyword() in ["constructor", "function", "method"]:
|
|
self.compileSubroutine()
|
|
self.tokenizer.advance()
|
|
|
|
# closing brace
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("class")
|
|
|
|
def compileClassVarDec(self):
|
|
# compile static or field declaration
|
|
self.writeOpenTag("classVarDec")
|
|
|
|
# static or field
|
|
self.writeCurrentToken()
|
|
|
|
# type
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# additional var names
|
|
self.tokenizer.advance()
|
|
while self.tokenizer.symbol() == ",":
|
|
# comma
|
|
self.writeCurrentToken()
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
# semicolon
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("classVarDec")
|
|
|
|
def compileSubroutine(self):
|
|
# compile method, function, or constructor
|
|
self.writeOpenTag("subroutineDec")
|
|
|
|
# constructor, function, or method
|
|
self.writeCurrentToken()
|
|
|
|
# return type
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# subroutine name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# opening paren
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# parameter list
|
|
self.tokenizer.advance()
|
|
self.compileParameterList()
|
|
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
|
|
# subroutine body
|
|
self.tokenizer.advance()
|
|
self.compileSubroutineBody()
|
|
|
|
self.writeCloseTag("subroutineDec")
|
|
|
|
def compileParameterList(self):
|
|
# compile parameter list (possibly empty)
|
|
self.writeOpenTag("parameterList")
|
|
|
|
# check if empty
|
|
if self.tokenizer.symbol() != ")":
|
|
# type
|
|
self.writeCurrentToken()
|
|
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# additional parameters
|
|
self.tokenizer.advance()
|
|
while self.tokenizer.symbol() == ",":
|
|
# comma
|
|
self.writeCurrentToken()
|
|
# type
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
self.writeCloseTag("parameterList")
|
|
|
|
def compileSubroutineBody(self):
|
|
# compile subroutine body
|
|
self.writeOpenTag("subroutineBody")
|
|
|
|
# opening brace
|
|
self.writeCurrentToken()
|
|
|
|
# var declarations
|
|
self.tokenizer.advance()
|
|
while self.tokenizer.keyword() == "var":
|
|
self.compileVarDec()
|
|
self.tokenizer.advance()
|
|
|
|
# statements
|
|
self.compileStatements()
|
|
|
|
# closing brace
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("subroutineBody")
|
|
|
|
def compileVarDec(self):
|
|
# compile var declaration
|
|
self.writeOpenTag("varDec")
|
|
|
|
# var keyword
|
|
self.writeCurrentToken()
|
|
|
|
# type
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# additional var names
|
|
self.tokenizer.advance()
|
|
while self.tokenizer.symbol() == ",":
|
|
# comma
|
|
self.writeCurrentToken()
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
# semicolon
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("varDec")
|
|
|
|
def compileStatements(self):
|
|
# compile sequence of statements
|
|
self.writeOpenTag("statements")
|
|
|
|
# process statements
|
|
while (
|
|
self.tokenizer.getTokenType() == "KEYWORD"
|
|
and self.tokenizer.keyword() in ["let", "if", "while", "do", "return"]
|
|
):
|
|
keyword = self.tokenizer.keyword()
|
|
|
|
if keyword == "let":
|
|
self.compileLet()
|
|
elif keyword == "if":
|
|
self.compileIf()
|
|
elif keyword == "while":
|
|
self.compileWhile()
|
|
elif keyword == "do":
|
|
self.compileDo()
|
|
elif keyword == "return":
|
|
self.compileReturn()
|
|
|
|
self.writeCloseTag("statements")
|
|
|
|
def compileLet(self):
|
|
# compile let statement
|
|
self.writeOpenTag("letStatement")
|
|
|
|
# let keyword
|
|
self.writeCurrentToken()
|
|
|
|
# var name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# check for array indexing
|
|
self.tokenizer.advance()
|
|
if self.tokenizer.symbol() == "[":
|
|
# opening bracket
|
|
self.writeCurrentToken()
|
|
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
|
|
# closing bracket
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
# equals sign
|
|
self.writeCurrentToken()
|
|
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
|
|
# semicolon
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("letStatement")
|
|
self.tokenizer.advance()
|
|
|
|
def compileIf(self):
|
|
# compile if statement
|
|
self.writeOpenTag("ifStatement")
|
|
|
|
# if keyword
|
|
self.writeCurrentToken()
|
|
|
|
# opening paren
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
|
|
# opening brace
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# statements
|
|
self.tokenizer.advance()
|
|
self.compileStatements()
|
|
|
|
# closing brace
|
|
self.writeCurrentToken()
|
|
|
|
# check for else
|
|
self.tokenizer.advance()
|
|
if self.tokenizer.keyword() == "else":
|
|
# else keyword
|
|
self.writeCurrentToken()
|
|
|
|
# opening brace
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# statements
|
|
self.tokenizer.advance()
|
|
self.compileStatements()
|
|
|
|
# closing brace
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
self.writeCloseTag("ifStatement")
|
|
|
|
def compileWhile(self):
|
|
# compile while statement
|
|
self.writeOpenTag("whileStatement")
|
|
|
|
# while keyword
|
|
self.writeCurrentToken()
|
|
|
|
# opening paren
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
|
|
# opening brace
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# statements
|
|
self.tokenizer.advance()
|
|
self.compileStatements()
|
|
|
|
# closing brace
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("whileStatement")
|
|
self.tokenizer.advance()
|
|
|
|
def compileDo(self):
|
|
# compile do statement
|
|
self.writeOpenTag("doStatement")
|
|
|
|
# do keyword
|
|
self.writeCurrentToken()
|
|
|
|
# subroutine call (identifier)
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
# check for class/var name or direct call
|
|
self.tokenizer.advance()
|
|
if self.tokenizer.symbol() == ".":
|
|
# class or object method call
|
|
# dot
|
|
self.writeCurrentToken()
|
|
# method name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
# opening paren
|
|
self.writeCurrentToken()
|
|
|
|
# expression list
|
|
self.tokenizer.advance()
|
|
self.compileExpressionList()
|
|
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
|
|
# semicolon
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("doStatement")
|
|
self.tokenizer.advance()
|
|
|
|
def compileReturn(self):
|
|
# compile return statement
|
|
self.writeOpenTag("returnStatement")
|
|
|
|
# return keyword
|
|
self.writeCurrentToken()
|
|
|
|
# check for return value
|
|
self.tokenizer.advance()
|
|
if self.tokenizer.symbol() != ";":
|
|
# expression
|
|
self.compileExpression()
|
|
|
|
# semicolon
|
|
self.writeCurrentToken()
|
|
|
|
self.writeCloseTag("returnStatement")
|
|
self.tokenizer.advance()
|
|
|
|
def compileExpression(self):
|
|
# compile expression
|
|
self.writeOpenTag("expression")
|
|
|
|
# term
|
|
self.compileTerm()
|
|
|
|
# check for op term
|
|
ops = {"+", "-", "*", "/", "&", "|", "<", ">", "="}
|
|
while self.tokenizer.symbol() in ops:
|
|
# operator
|
|
self.writeCurrentToken()
|
|
# term
|
|
self.tokenizer.advance()
|
|
self.compileTerm()
|
|
|
|
self.writeCloseTag("expression")
|
|
|
|
def compileTerm(self):
|
|
# compile term
|
|
self.writeOpenTag("term")
|
|
|
|
tokenType = self.tokenizer.getTokenType()
|
|
|
|
if tokenType == "INT_CONST":
|
|
# integer constant
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif tokenType == "STRING_CONST":
|
|
# string constant
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif tokenType == "KEYWORD":
|
|
# keyword constant (true, false, null, this)
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif self.tokenizer.symbol() == "(":
|
|
# opening paren
|
|
self.writeCurrentToken()
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif self.tokenizer.symbol() in ["-", "~"]:
|
|
# unary operator
|
|
self.writeCurrentToken()
|
|
# term
|
|
self.tokenizer.advance()
|
|
self.compileTerm()
|
|
elif tokenType == "IDENTIFIER":
|
|
# var name, array access, or subroutine call
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
if self.tokenizer.symbol() == "[":
|
|
# array access
|
|
# opening bracket
|
|
self.writeCurrentToken()
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
# closing bracket
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif self.tokenizer.symbol() == "(":
|
|
# subroutine call
|
|
# opening paren
|
|
self.writeCurrentToken()
|
|
# expression list
|
|
self.tokenizer.advance()
|
|
self.compileExpressionList()
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
elif self.tokenizer.symbol() == ".":
|
|
# method call
|
|
# dot
|
|
self.writeCurrentToken()
|
|
# method name
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
# opening paren
|
|
self.tokenizer.advance()
|
|
self.writeCurrentToken()
|
|
# expression list
|
|
self.tokenizer.advance()
|
|
self.compileExpressionList()
|
|
# closing paren
|
|
self.writeCurrentToken()
|
|
self.tokenizer.advance()
|
|
|
|
self.writeCloseTag("term")
|
|
|
|
def compileExpressionList(self):
|
|
# compile expression list (possibly empty)
|
|
self.writeOpenTag("expressionList")
|
|
|
|
# check if empty
|
|
if self.tokenizer.symbol() != ")":
|
|
# expression
|
|
self.compileExpression()
|
|
|
|
# additional expressions
|
|
while self.tokenizer.symbol() == ",":
|
|
# comma
|
|
self.writeCurrentToken()
|
|
# expression
|
|
self.tokenizer.advance()
|
|
self.compileExpression()
|
|
|
|
self.writeCloseTag("expressionList")
|
|
|
|
def close(self):
|
|
# close output file
|
|
self.output.close()
|
|
|
|
|
|
def analyzeFile(jackFile, outputFile, tokenizeOnly=False):
|
|
# analyze single Jack file
|
|
tokenizer = JackTokenizer(jackFile)
|
|
|
|
if tokenizeOnly:
|
|
# tokenizer test output
|
|
output = open(outputFile, "w")
|
|
output.write("<tokens>\r\n")
|
|
|
|
while tokenizer.hasMoreTokens():
|
|
tokenizer.advance()
|
|
tokenType = tokenizer.getTokenType()
|
|
|
|
if tokenType == "KEYWORD":
|
|
value = tokenizer.keyword()
|
|
output.write(f"<keyword> {value} </keyword>\r\n")
|
|
elif tokenType == "SYMBOL":
|
|
value = tokenizer.symbol()
|
|
# escape special characters
|
|
if value == "<":
|
|
value = "<"
|
|
elif value == ">":
|
|
value = ">"
|
|
elif value == '"':
|
|
value = """
|
|
elif value == "&":
|
|
value = "&"
|
|
output.write(f"<symbol> {value} </symbol>\r\n")
|
|
elif tokenType == "IDENTIFIER":
|
|
value = tokenizer.identifier()
|
|
output.write(f"<identifier> {value} </identifier>\r\n")
|
|
elif tokenType == "INT_CONST":
|
|
value = tokenizer.intVal()
|
|
output.write(f"<integerConstant> {value} </integerConstant>\r\n")
|
|
elif tokenType == "STRING_CONST":
|
|
value = tokenizer.stringVal()
|
|
output.write(f"<stringConstant> {value} </stringConstant>\r\n")
|
|
|
|
output.write("</tokens>\r\n")
|
|
output.close()
|
|
else:
|
|
# full compilation
|
|
engine = CompilationEngine(tokenizer, outputFile)
|
|
engine.compileClass()
|
|
engine.close()
|
|
|
|
|
|
def main():
|
|
# analyze Jack file or directory
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python hjc.py <file_or_directory>")
|
|
sys.exit(1)
|
|
|
|
inputPath = sys.argv[1]
|
|
|
|
if not os.path.exists(inputPath):
|
|
print(f"Error: Path '{inputPath}' not found")
|
|
sys.exit(1)
|
|
|
|
if os.path.isfile(inputPath):
|
|
# single file mode
|
|
if not inputPath.endswith(".jack"):
|
|
print("Error: Input file must have .jack extension")
|
|
sys.exit(1)
|
|
|
|
# Generate tokenizer output
|
|
tokenizerFile = inputPath[:-5] + "T.xml"
|
|
analyzeFile(inputPath, tokenizerFile, True)
|
|
|
|
# Generate parser output
|
|
parserFile = inputPath[:-5] + ".xml"
|
|
analyzeFile(inputPath, parserFile, False)
|
|
|
|
print(
|
|
f"Analyzed '{inputPath}' - generated '{tokenizerFile}' and '{parserFile}'"
|
|
)
|
|
|
|
elif os.path.isdir(inputPath):
|
|
# directory mode
|
|
jackFiles = [f for f in os.listdir(inputPath) if f.endswith(".jack")]
|
|
|
|
if not jackFiles:
|
|
print(f"Error: No .jack files found in directory '{inputPath}'")
|
|
sys.exit(1)
|
|
|
|
for jackFile in jackFiles:
|
|
inputFile = os.path.join(inputPath, jackFile)
|
|
|
|
# Generate tokenizer output
|
|
tokenizerFile = os.path.join(inputPath, jackFile[:-5] + "T.xml")
|
|
analyzeFile(inputFile, tokenizerFile, True)
|
|
|
|
# Generate parser output
|
|
parserFile = os.path.join(inputPath, jackFile[:-5] + ".xml")
|
|
analyzeFile(inputFile, parserFile, False)
|
|
|
|
print(f"Analyzed {len(jackFiles)} files in '{inputPath}'")
|
|
|
|
else:
|
|
print(f"Error: '{inputPath}' is neither file nor directory")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|