From d408c5082708fa53cacbbfdd951086a484818e09 Mon Sep 17 00:00:00 2001
From: Sean O'Connor <sean@soconnor.dev>
Date: Wed, 10 Sep 2025 23:50:42 -0400
Subject: [PATCH] Initial commit: Tree-sitter grammar for Hack Binary language

---
 grammar.js               |  30 ++++
 parser.dylib             | Bin 0 -> 33824 bytes
 src/grammar.json         |  74 +++++++++
 src/node-types.json      |  46 ++++++
 src/parser.c             | 335 +++++++++++++++++++++++++++++++++++++++
 src/tree_sitter/alloc.h  |  54 +++++++
 src/tree_sitter/array.h  | 291 ++++++++++++++++++++++++++++++++++
 src/tree_sitter/parser.h | 286 +++++++++++++++++++++++++++++++++
 tree-sitter.json         |  37 +++++
 9 files changed, 1153 insertions(+)
 create mode 100644 grammar.js
 create mode 100755 parser.dylib
 create mode 100644 src/grammar.json
 create mode 100644 src/node-types.json
 create mode 100644 src/parser.c
 create mode 100644 src/tree_sitter/alloc.h
 create mode 100644 src/tree_sitter/array.h
 create mode 100644 src/tree_sitter/parser.h
 create mode 100644 tree-sitter.json
diff --git a/grammar.js b/grammar.js
new file mode 100644
index 0000000..0e980c0
--- /dev/null
+++ b/grammar.js
@@ -0,0 +1,30 @@
+module.exports = grammar({
+  name: 'hack_binary',
+
+  rules: {
+    source_file: $ => repeat($._item),
+
+    _item: $ => choice(
+      $.binary_instruction,
+      $.comment,
+      $._whitespace
+    ),
+
+    // 16-bit binary instruction
+    binary_instruction: $ => $.bit_sequence,
+
+    // 16-bit sequence of 0s and 1s
+    bit_sequence: $ => /[01]{16}/,
+
+    // Comments
+    comment: $ => token(seq('//', /.*/)),
+
+    // Whitespace
+    _whitespace: $ => /\s+/
+  },
+
+  extras: $ => [
+    /\s/,
+    $.comment
+  ]
+});
diff --git a/parser.dylib b/parser.dylib
new file mode 100755
index 0000000000000000000000000000000000000000..7881b8398d0c6b96ba1bf1885086b2f577d23a69
GIT binary patch
literal 33824
zcmeI5e{38_702J+`FCQ+O&k)`6rWoZ($M;7Y}5i$a&8kyMAd}k5?oYRukY6O;k!Ni
zc3s<XMUNIk+6wBlB~S^eaZ#bRT#1$xfe?@iihw_;Dzzp-6eTJuicl+|M4?y;=KE%5
za~mgVr9|Q%-q>?*-<vmYW<N8#cmK@HD+_P`XE~8YNHxeSkk?ibO;R(P=!3`yP-@%h
zZQW^Qy6_`)p-M)thM6-$sbt%MI~EuPVrjf?<5(BhKCH(o?i{EbRc$*{^!!S|rSW=d
zwRg+}Vf})5(O|enJU=MqynbU}8n3rr7ktT-hq_TOif!8?-e}It+BvVU2&bj-j+%Hq
zCIIX2)juAG@2WUkyV70u9Xs#ZqemlU0$fHiPR=~QJfatKyL7xBH6u_VrR(GBl8O7Z
z9oe?;+;P`uw{6`K&XKZlPZ??Sgc)ZJA2z`rjmTrjYmuY&<X+z``ANTs;r5E9<kn(l
zq~LmipWIa{e%a07!p*q-ogKF%J;%##3S5k$fAfAfo9r(+g@S|115Re}=H8s=l*W@o
zFu0}m?07!ci>9cPsH|psnP?aE4x{gaz5zOq3HPZCy$Q1Ct3(NOa2c-IpA+|;GxwW)
zt|VQHO5DynADbL|>+c<F5B>A(zUN+?*tZF`1Tx2_YCN6De61dn>PM?K;#&2e%aQOb
z({0EcOV6jz;4!o)IpUX+xM$9L#{Ix8wBKH#K(rqI9A~UdR0nyZ(fPhPU5Cu&<p+ok
ztIn|X@+m<yh0=pZ<M#YQ|IcUMwXK=6pO4L)*%_ZXH&HcnE^Zw>6Srnh$7yyom3Qw$
z9z{mKr`A4r-9fyIS}f@4Sb6tAP3qq#;;UxK+A=>LOZ{x%0~P(iW?I?0mxh10iqZ!*
zryhF<-E0^fxb4RJhDQ=s>->pxwYP5h6%C(gru2r1)|IU<(eO!JbL<wxKDD^GCbYkF
zjkce$%Ddl;*ezo}0{a`Vk2cWwKtrVu9P0>TeH?PbN9c2{t7$m1u$bQRAPpxV$8axU
z8`Dj?jWlBYA=1XlMr{ue<2Mm|TWE*>FC%tmxw!|}e;Tn*FEeq_pBEzblMUMb8vN%X
z_BLZrqun1w>`uM$hy5_@ieWXSx8Yt=r)zHw@AGNc+ThcP_6|ks*XsIDplm8)pRO@>
z^yA@({UqWjj`06l#NKA?o$&uk#O_pS|1<C(gZ<tmW5Q!`cbq!6e3XXqu&CHippOOk
z^0@Ty_+VbN#dKLG{QFQAj^($wE!gl3wbVflTp~V~@!UNeqm}0-wqkDIfjNFV=6cu6
zLjTLp^4zE0pZxTOdj{?dI=$=Ldsn3D_a1z7|7r_QWL!PR`U$G0TB^r8r5evH|20Bi
zLD!%#uKrfhYCO^Wj?j9HJ_Ik;5|EavbG?f9VWsJ)>MI6KsejmUVLrK>YsIMwZPuW*
z%O~zd)I0PHUjpU}8}TZMuemi&7TVBK34AQo`A#bS;d|?ReQT+|ZZRx3@4`xZF?}~W
zD&iVlwTnJoY)3q{hy6Y8ukB$wmc_r1l&hAZZt9(nYxL09^|I8P3vAyV9&x>lOPOMU
zzrn~J9moZ)KjgsX7e`7N*Y3;ZU9w><pbCFq;pYQ>mW*VAT+xd<*(G<#b%I+I^>wO0
zL{Z7T;ondxNY4EGhf_*@Rz8^JaxiH@=45^g#_&y}u`8>52mv7=1cZPP5CTF#2nYco
zAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYco
zAOwVf5D)@FKnMr{As_^V!2g;6uUF>vzMR4ZU%bwZufv6GyfW-wwSKIbeq!V?BR8qa
z&6MbZ(?zSx*rwA7<NvOab*5f}kxfQsRVNOI)vj8uS8>Z%97nJ7Z>B!An6jCCBPWdf
zhLMjO`J|C^N?viZQc(y90U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@F
zKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@F
zKnMr{As__)4+$i2&)ZGr+Yi_M^R+%}w{pffpDRu&>=2OR3!N-=QhKP^LCJM(KNkdU
z$>yWiZGDt2CD<cQ$JC1aV|9E|IjJ*^o6w+M_fK>KY)#m3K<_u8&SO`)-gkg#hbh<l
z4-jRHuJ;!p8a2A!XMpJIM%Vid5KS9h?>j*BqS5vK14O?zy52W{=(y4KJ_khqU>zr_
zhxgxzzO%0PKhUQOuYiru-sO~_%kr=T-_E;ZaPVz)EWI5#y?p*Jbid%_^Y3r93(k=C
zA20M4^LD`voU9W#7ubx8SM+QQNde~y=WQpSbNutQwH6*w?5o=7nO!JmU9An3as@t)
z*bkh*wR2fxbxOW#JL+^|wp@DP+w+dsKjQS8JJUV&oB~1<E&O%)QOvj}aS3WFwZMiM
zU`<jC5cxQ4A{V(^7vrF8dZ3($siW?V>Agf;%{TT?kW1#&`y=1%czWZf-abA5#f3vt
z?cbZcd;L9&ach3(wyB}-f8nNY4?cJQ+zZ{s-=69_TJ@(_Umc9??5<wC@5p`oC;$A5
zN47t7(+{t<{F&$1(I4-BXLHNOH9^O-Gq0Z=eEFF*f4kv3-YYfvYquYN<7(r-U;Wv)
px_&e}x2|<r`T4)hzBbpsC)1cc{^nCZo<6nyU$1}S_fHw5{{@IU;Q;^u

literal 0
HcmV?d00001

diff --git a/src/grammar.json b/src/grammar.json
new file mode 100644
index 0000000..d445730
--- /dev/null
+++ b/src/grammar.json
@@ -0,0 +1,74 @@
+{
+  "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
+  "name": "hack_binary",
+  "rules": {
+    "source_file": {
+      "type": "REPEAT",
+      "content": {
+        "type": "SYMBOL",
+        "name": "_item"
+      }
+    },
+    "_item": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "binary_instruction"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "comment"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "_whitespace"
+        }
+      ]
+    },
+    "binary_instruction": {
+      "type": "SYMBOL",
+      "name": "bit_sequence"
+    },
+    "bit_sequence": {
+      "type": "PATTERN",
+      "value": "[01]{16}"
+    },
+    "comment": {
+      "type": "TOKEN",
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {
+            "type": "STRING",
+            "value": "//"
+          },
+          {
+            "type": "PATTERN",
+            "value": ".*"
+          }
+        ]
+      }
+    },
+    "_whitespace": {
+      "type": "PATTERN",
+      "value": "\\s+"
+    }
+  },
+  "extras": [
+    {
+      "type": "PATTERN",
+      "value": "\\s"
+    },
+    {
+      "type": "SYMBOL",
+      "name": "comment"
+    }
+  ],
+  "conflicts": [],
+  "precedences": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": [],
+  "reserved": {}
+}
\ No newline at end of file
diff --git a/src/node-types.json b/src/node-types.json
new file mode 100644
index 0000000..2c66106
--- /dev/null
+++ b/src/node-types.json
@@ -0,0 +1,46 @@
+[
+  {
+    "type": "binary_instruction",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": false,
+      "required": true,
+      "types": [
+        {
+          "type": "bit_sequence",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "source_file",
+    "named": true,
+    "root": true,
+    "fields": {},
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "binary_instruction",
+          "named": true
+        },
+        {
+          "type": "comment",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "bit_sequence",
+    "named": true
+  },
+  {
+    "type": "comment",
+    "named": true,
+    "extra": true
+  }
+]
\ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..0538c5e
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,335 @@
+/* Automatically @generated by tree-sitter v0.25.8 */
+
+#include "tree_sitter/parser.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+#define LANGUAGE_VERSION 14
+#define STATE_COUNT 6
+#define LARGE_STATE_COUNT 4
+#define SYMBOL_COUNT 8
+#define ALIAS_COUNT 0
+#define TOKEN_COUNT 4
+#define EXTERNAL_TOKEN_COUNT 0
+#define FIELD_COUNT 0
+#define MAX_ALIAS_SEQUENCE_LENGTH 2
+#define MAX_RESERVED_WORD_SET_SIZE 0
+#define PRODUCTION_ID_COUNT 1
+#define SUPERTYPE_COUNT 0
+
+enum ts_symbol_identifiers {
+  sym_bit_sequence = 1,
+  sym_comment = 2,
+  sym__whitespace = 3,
+  sym_source_file = 4,
+  sym__item = 5,
+  sym_binary_instruction = 6,
+  aux_sym_source_file_repeat1 = 7,
+};
+
+static const char * const ts_symbol_names[] = {
+  [ts_builtin_sym_end] = "end",
+  [sym_bit_sequence] = "bit_sequence",
+  [sym_comment] = "comment",
+  [sym__whitespace] = "_whitespace",
+  [sym_source_file] = "source_file",
+  [sym__item] = "_item",
+  [sym_binary_instruction] = "binary_instruction",
+  [aux_sym_source_file_repeat1] = "source_file_repeat1",
+};
+
+static const TSSymbol ts_symbol_map[] = {
+  [ts_builtin_sym_end] = ts_builtin_sym_end,
+  [sym_bit_sequence] = sym_bit_sequence,
+  [sym_comment] = sym_comment,
+  [sym__whitespace] = sym__whitespace,
+  [sym_source_file] = sym_source_file,
+  [sym__item] = sym__item,
+  [sym_binary_instruction] = sym_binary_instruction,
+  [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1,
+};
+
+static const TSSymbolMetadata ts_symbol_metadata[] = {
+  [ts_builtin_sym_end] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym_bit_sequence] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym_comment] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym__whitespace] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym_source_file] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym__item] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym_binary_instruction] = {
+    .visible = true,
+    .named = true,
+  },
+  [aux_sym_source_file_repeat1] = {
+    .visible = false,
+    .named = false,
+  },
+};
+
+static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
+  [0] = {0},
+};
+
+static const uint16_t ts_non_terminal_alias_map[] = {
+  0,
+};
+
+static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
+  [0] = 0,
+  [1] = 1,
+  [2] = 2,
+  [3] = 3,
+  [4] = 4,
+  [5] = 5,
+};
+
+static bool ts_lex(TSLexer *lexer, TSStateId state) {
+  START_LEXER();
+  eof = lexer->eof(lexer);
+  switch (state) {
+    case 0:
+      if (eof) ADVANCE(18);
+      if (lookahead == '/') ADVANCE(1);
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(16);
+      if (('\t' <= lookahead && lookahead <= '\r') ||
+          lookahead == ' ') ADVANCE(21);
+      END_STATE();
+    case 1:
+      if (lookahead == '/') ADVANCE(20);
+      END_STATE();
+    case 2:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(19);
+      END_STATE();
+    case 3:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(2);
+      END_STATE();
+    case 4:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(3);
+      END_STATE();
+    case 5:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(4);
+      END_STATE();
+    case 6:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(5);
+      END_STATE();
+    case 7:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(6);
+      END_STATE();
+    case 8:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(7);
+      END_STATE();
+    case 9:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(8);
+      END_STATE();
+    case 10:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(9);
+      END_STATE();
+    case 11:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(10);
+      END_STATE();
+    case 12:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(11);
+      END_STATE();
+    case 13:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(12);
+      END_STATE();
+    case 14:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(13);
+      END_STATE();
+    case 15:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(14);
+      END_STATE();
+    case 16:
+      if (lookahead == '0' ||
+          lookahead == '1') ADVANCE(15);
+      END_STATE();
+    case 17:
+      if (eof) ADVANCE(18);
+      if (lookahead == '/') ADVANCE(1);
+      if (('\t' <= lookahead && lookahead <= '\r') ||
+          lookahead == ' ') SKIP(17);
+      END_STATE();
+    case 18:
+      ACCEPT_TOKEN(ts_builtin_sym_end);
+      END_STATE();
+    case 19:
+      ACCEPT_TOKEN(sym_bit_sequence);
+      END_STATE();
+    case 20:
+      ACCEPT_TOKEN(sym_comment);
+      if (lookahead != 0 &&
+          lookahead != '\n') ADVANCE(20);
+      END_STATE();
+    case 21:
+      ACCEPT_TOKEN(sym__whitespace);
+      if (('\t' <= lookahead && lookahead <= '\r') ||
+          lookahead == ' ') ADVANCE(21);
+      END_STATE();
+    default:
+      return false;
+  }
+}
+
+static const TSLexMode ts_lex_modes[STATE_COUNT] = {
+  [0] = {.lex_state = 0},
+  [1] = {.lex_state = 0},
+  [2] = {.lex_state = 0},
+  [3] = {.lex_state = 0},
+  [4] = {.lex_state = 0},
+  [5] = {.lex_state = 17},
+};
+
+static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
+  [STATE(0)] = {
+    [ts_builtin_sym_end] = ACTIONS(1),
+    [sym_bit_sequence] = ACTIONS(1),
+    [sym_comment] = ACTIONS(3),
+    [sym__whitespace] = ACTIONS(1),
+  },
+  [STATE(1)] = {
+    [sym_source_file] = STATE(5),
+    [sym__item] = STATE(2),
+    [sym_binary_instruction] = STATE(2),
+    [aux_sym_source_file_repeat1] = STATE(2),
+    [ts_builtin_sym_end] = ACTIONS(5),
+    [sym_bit_sequence] = ACTIONS(7),
+    [sym_comment] = ACTIONS(9),
+    [sym__whitespace] = ACTIONS(11),
+  },
+  [STATE(2)] = {
+    [sym__item] = STATE(3),
+    [sym_binary_instruction] = STATE(3),
+    [aux_sym_source_file_repeat1] = STATE(3),
+    [ts_builtin_sym_end] = ACTIONS(13),
+    [sym_bit_sequence] = ACTIONS(7),
+    [sym_comment] = ACTIONS(15),
+    [sym__whitespace] = ACTIONS(17),
+  },
+  [STATE(3)] = {
+    [sym__item] = STATE(3),
+    [sym_binary_instruction] = STATE(3),
+    [aux_sym_source_file_repeat1] = STATE(3),
+    [ts_builtin_sym_end] = ACTIONS(19),
+    [sym_bit_sequence] = ACTIONS(21),
+    [sym_comment] = ACTIONS(24),
+    [sym__whitespace] = ACTIONS(27),
+  },
+};
+
+static const uint16_t ts_small_parse_table[] = {
+  [0] = 2,
+    ACTIONS(30), 2,
+      ts_builtin_sym_end,
+      sym__whitespace,
+    ACTIONS(32), 2,
+      sym_bit_sequence,
+      sym_comment,
+  [9] = 2,
+    ACTIONS(34), 1,
+      ts_builtin_sym_end,
+    ACTIONS(36), 1,
+      sym_comment,
+};
+
+static const uint32_t ts_small_parse_table_map[] = {
+  [SMALL_STATE(4)] = 0,
+  [SMALL_STATE(5)] = 9,
+};
+
+static const TSParseActionEntry ts_parse_actions[] = {
+  [0] = {.entry = {.count = 0, .reusable = false}},
+  [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
+  [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT_EXTRA(),
+  [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0, 0, 0),
+  [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(4),
+  [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2),
+  [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
+  [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0),
+  [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(3),
+  [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
+  [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0),
+  [21] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(4),
+  [24] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(3),
+  [27] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(3),
+  [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_binary_instruction, 1, 0, 0),
+  [32] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_binary_instruction, 1, 0, 0),
+  [34] = {.entry = {.count = 1, .reusable = true}},  ACCEPT_INPUT(),
+  [36] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(),
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef TREE_SITTER_HIDE_SYMBOLS
+#define TS_PUBLIC
+#elif defined(_WIN32)
+#define TS_PUBLIC __declspec(dllexport)
+#else
+#define TS_PUBLIC __attribute__((visibility("default")))
+#endif
+
+TS_PUBLIC const TSLanguage *tree_sitter_hack_binary(void) {
+  static const TSLanguage language = {
+    .abi_version = LANGUAGE_VERSION,
+    .symbol_count = SYMBOL_COUNT,
+    .alias_count = ALIAS_COUNT,
+    .token_count = TOKEN_COUNT,
+    .external_token_count = EXTERNAL_TOKEN_COUNT,
+    .state_count = STATE_COUNT,
+    .large_state_count = LARGE_STATE_COUNT,
+    .production_id_count = PRODUCTION_ID_COUNT,
+    .field_count = FIELD_COUNT,
+    .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
+    .parse_table = &ts_parse_table[0][0],
+    .small_parse_table = ts_small_parse_table,
+    .small_parse_table_map = ts_small_parse_table_map,
+    .parse_actions = ts_parse_actions,
+    .symbol_names = ts_symbol_names,
+    .symbol_metadata = ts_symbol_metadata,
+    .public_symbol_map = ts_symbol_map,
+    .alias_map = ts_non_terminal_alias_map,
+    .alias_sequences = &ts_alias_sequences[0][0],
+    .lex_modes = (const void*)ts_lex_modes,
+    .lex_fn = ts_lex,
+    .primary_state_ids = ts_primary_state_ids,
+  };
+  return &language;
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h
new file mode 100644
index 0000000..1abdd12
--- /dev/null
+++ b/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t size);
+extern void *(*ts_current_calloc)(size_t count, size_t size);
+extern void *(*ts_current_realloc)(void *ptr, size_t size);
+extern void (*ts_current_free)(void *ptr);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h
new file mode 100644
index 0000000..a17a574
--- /dev/null
+++ b/src/tree_sitter/array.h
@@ -0,0 +1,291 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  do { \
+    if ((count) == 0) break; \
+    _array__grow((Array *)(self), count, array_elem_size(self)); \
+    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+    (self)->size += (count); \
+  } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
new file mode 100644
index 0000000..858107d
--- /dev/null
+++ b/src/tree_sitter/parser.h
@@ -0,0 +1,286 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+typedef struct TSLanguageMetadata {
+  uint8_t major_version;
+  uint8_t minor_version;
+  uint8_t patch_version;
+} TSLanguageMetadata;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+// Used to index the field and supertype maps.
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+  void (*log)(const TSLexer *, const char *, ...);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+  uint16_t reserved_word_set_id;
+} TSLexerMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t abi_version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexerMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+  const char *name;
+  const TSSymbol *reserved_words;
+  uint16_t max_reserved_word_set_size;
+  uint32_t supertype_count;
+  const TSSymbol *supertype_symbols;
+  const TSMapSlice *supertype_map_slices;
+  const TSSymbol *supertype_map_entries;
+  TSLanguageMetadata metadata;
+};
+
+static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    const TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  const TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/tree-sitter.json b/tree-sitter.json
new file mode 100644
index 0000000..90c0efa
--- /dev/null
+++ b/tree-sitter.json
@@ -0,0 +1,37 @@
+{
+    "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json",
+    "grammars": [
+        {
+            "name": "hack_binary",
+            "camelcase": "HackBinary",
+            "title": "Hack Binary",
+            "scope": "source.hack_binary",
+            "file-types": ["hack"],
+            "injection-regex": "^hack[-_]?binary$",
+            "class-name": "TreeSitterHackBinary"
+        }
+    ],
+    "metadata": {
+        "version": "1.0.0",
+        "license": "MIT",
+        "description": "A Tree-sitter grammar for parsing Hack Binary machine code (nand2tetris)",
+        "authors": [
+            {
+                "name": "Sean O'Connor",
+                "email": "sean@soconnor.dev"
+            }
+        ],
+        "links": {
+            "repository": "https://github.com/soconnor0919/nand2tetris-zed"
+        }
+    },
+    "bindings": {
+        "c": true,
+        "go": false,
+        "node": true,
+        "python": false,
+        "rust": true,
+        "swift": false,
+        "zig": false
+    }
+}