From d408c5082708fa53cacbbfdd951086a484818e09 Mon Sep 17 00:00:00 2001 From: Sean O'Connor Date: Wed, 10 Sep 2025 23:50:42 -0400 Subject: [PATCH] Initial commit: Tree-sitter grammar for Hack Binary language --- grammar.js | 30 ++++ parser.dylib | Bin 0 -> 33824 bytes src/grammar.json | 74 +++++++++ src/node-types.json | 46 ++++++ src/parser.c | 335 +++++++++++++++++++++++++++++++++++++++ src/tree_sitter/alloc.h | 54 +++++++ src/tree_sitter/array.h | 291 ++++++++++++++++++++++++++++++++++ src/tree_sitter/parser.h | 286 +++++++++++++++++++++++++++++++++ tree-sitter.json | 37 +++++ 9 files changed, 1153 insertions(+) create mode 100644 grammar.js create mode 100755 parser.dylib create mode 100644 src/grammar.json create mode 100644 src/node-types.json create mode 100644 src/parser.c create mode 100644 src/tree_sitter/alloc.h create mode 100644 src/tree_sitter/array.h create mode 100644 src/tree_sitter/parser.h create mode 100644 tree-sitter.json diff --git a/grammar.js b/grammar.js new file mode 100644 index 0000000..0e980c0 --- /dev/null +++ b/grammar.js @@ -0,0 +1,30 @@ +module.exports = grammar({ + name: 'hack_binary', + + rules: { + source_file: $ => repeat($._item), + + _item: $ => choice( + $.binary_instruction, + $.comment, + $._whitespace + ), + + // 16-bit binary instruction + binary_instruction: $ => $.bit_sequence, + + // 16-bit sequence of 0s and 1s + bit_sequence: $ => /[01]{16}/, + + // Comments + comment: $ => token(seq('//', /.*/)), + + // Whitespace + _whitespace: $ => /\s+/ + }, + + extras: $ => [ + /\s/, + $.comment + ] +}); diff --git a/parser.dylib b/parser.dylib new file mode 100755 index 0000000000000000000000000000000000000000..7881b8398d0c6b96ba1bf1885086b2f577d23a69 GIT binary patch literal 33824 zcmeI5e{38_702J+`FCQ+O&k)`6rWoZ($M;7Y}5i$a&8kyMAd}k5?oYRukY6O;k!Ni zc3s?*-9cPsH|psnP?aE4x{gaz5zOq3HPZCy$Q1Ct3(NOa2c-IpA+|;GxwW) zt|VQHO5DynADbL|>+cA(zUN+?*tZF`1Tx2_YCN6De61dn>PM?K;#&2e%aQOb z({0EcOV6jz;4!o)IpUX+xM$9L#{Ix8wBKH#K(rqI9A~UdR0nyZ(fPhPU5Cu&6Srnh$7yyom3Qw$ z9z{mKr`A4r-9fyIS}f@4Sb6tAP3qq#;;UxK+A=>LOZ{x%0~P(iW?I?0mxh10iqZ!* zryhF<-E0^fxb4RJhDQ=s>->pxwYP5h6%C(gru2r1)|IU<(eO!JbLTeH?PbN9c2{t7$m1u$bQRAPpxV$8axU z8`Dj?jWlBYA=1XlMr{ue<2Mm|TWE*>FC%tmxw!|}e;Tn*FEeq_pBEzblMUMb8vN%X z_BLZrqun1w>`uM$hy5_@ieWXSx8Yt=r)zHw@AGNc+ThcP_6|ks*XsIDplm8)pRO@> z^yA@({UqWjj`06l#NKA?o$&uk#O_pS|1MI6KsejmUVLrK>YsIMwZPuW* z%O~zd)I0PHUjpU}8}TZMuemi&7TVBK34AQo`A#bS;d|?ReQT+|ZZRx3@4`xZF?}~W zD&iVlwTnJoY)3q{hy6Y8ukB$wmc_r1l&hAZZt9(nYxL09^|I8P3vAyV9&x>lOPOMU zzrn~J9moZ)KjgsX7e`7N*Y3;ZU9w>mW*VAT+xd<*(G<#b%I+I^>wO0 zL{Z7T;ondxNY4EGhf_*@Rz8^JaxiH@=45^g#_&y}u`8>52mv7=1cZPP5CTF#2nYco zAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYco zAOwVf5D)@FKnMr{As_^V!2g;6uUF>vzMR4ZU%bwZufv6GyfW-wwSKIbeq!V?BR8qa z&6MbZ(?zSx*rwA7Z%97nJ7Z>B!An6jCCBPWdf zhLMjO`J|C^N?viZQc(y90U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@F zKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@F zKnMr{As__)4+$i2&)ZGr+Yi_M^R+%}w{pffpDRu&>=2OR3!N-=QhKP^LCJM(KNkdU z$>yWiZGDt2CDKY)#m3K<_u8&SO`)-gkg#hbhj*BqS5vK14O?zy52W{=(y4KJ_khqU>zr_ zhxgxzzO%0PKhUQOuYiru-sO~_%kr=T-_E;ZaPVz)EWI5#y?p*Jbid%_^Y3r93(k=C zA20M4^LD`voU9W#7ubx8SM+QQNde~y=WQpSbNutQwH6*w?5o=7nO!JmU9An3as@t) z*bkh*wR2fxbxOW#JL+^|wp@DP+w+dsKjQS8JJUV&oB~1Agf;%{TT?kW1#&`y=1%czWZf-abA5#f3vt z?cbZcd;L9&ach3(wyB}-f8nNY4?cJQ+zZ{s-=69_TJ@(_Umc9??5eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(18); + if (lookahead == '/') ADVANCE(1); + if (lookahead == '0' || + lookahead == '1') ADVANCE(16); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(21); + END_STATE(); + case 1: + if (lookahead == '/') ADVANCE(20); + END_STATE(); + case 2: + if (lookahead == '0' || + lookahead == '1') ADVANCE(19); + END_STATE(); + case 3: + if (lookahead == '0' || + lookahead == '1') ADVANCE(2); + END_STATE(); + case 4: + if (lookahead == '0' || + lookahead == '1') ADVANCE(3); + END_STATE(); + case 5: + if (lookahead == '0' || + lookahead == '1') ADVANCE(4); + END_STATE(); + case 6: + if (lookahead == '0' || + lookahead == '1') ADVANCE(5); + END_STATE(); + case 7: + if (lookahead == '0' || + lookahead == '1') ADVANCE(6); + END_STATE(); + case 8: + if (lookahead == '0' || + lookahead == '1') ADVANCE(7); + END_STATE(); + case 9: + if (lookahead == '0' || + lookahead == '1') ADVANCE(8); + END_STATE(); + case 10: + if (lookahead == '0' || + lookahead == '1') ADVANCE(9); + END_STATE(); + case 11: + if (lookahead == '0' || + lookahead == '1') ADVANCE(10); + END_STATE(); + case 12: + if (lookahead == '0' || + lookahead == '1') ADVANCE(11); + END_STATE(); + case 13: + if (lookahead == '0' || + lookahead == '1') ADVANCE(12); + END_STATE(); + case 14: + if (lookahead == '0' || + lookahead == '1') ADVANCE(13); + END_STATE(); + case 15: + if (lookahead == '0' || + lookahead == '1') ADVANCE(14); + END_STATE(); + case 16: + if (lookahead == '0' || + lookahead == '1') ADVANCE(15); + END_STATE(); + case 17: + if (eof) ADVANCE(18); + if (lookahead == '/') ADVANCE(1); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(17); + END_STATE(); + case 18: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 19: + ACCEPT_TOKEN(sym_bit_sequence); + END_STATE(); + case 20: + ACCEPT_TOKEN(sym_comment); + if (lookahead != 0 && + lookahead != '\n') ADVANCE(20); + END_STATE(); + case 21: + ACCEPT_TOKEN(sym__whitespace); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(21); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 0}, + [2] = {.lex_state = 0}, + [3] = {.lex_state = 0}, + [4] = {.lex_state = 0}, + [5] = {.lex_state = 17}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [STATE(0)] = { + [ts_builtin_sym_end] = ACTIONS(1), + [sym_bit_sequence] = ACTIONS(1), + [sym_comment] = ACTIONS(3), + [sym__whitespace] = ACTIONS(1), + }, + [STATE(1)] = { + [sym_source_file] = STATE(5), + [sym__item] = STATE(2), + [sym_binary_instruction] = STATE(2), + [aux_sym_source_file_repeat1] = STATE(2), + [ts_builtin_sym_end] = ACTIONS(5), + [sym_bit_sequence] = ACTIONS(7), + [sym_comment] = ACTIONS(9), + [sym__whitespace] = ACTIONS(11), + }, + [STATE(2)] = { + [sym__item] = STATE(3), + [sym_binary_instruction] = STATE(3), + [aux_sym_source_file_repeat1] = STATE(3), + [ts_builtin_sym_end] = ACTIONS(13), + [sym_bit_sequence] = ACTIONS(7), + [sym_comment] = ACTIONS(15), + [sym__whitespace] = ACTIONS(17), + }, + [STATE(3)] = { + [sym__item] = STATE(3), + [sym_binary_instruction] = STATE(3), + [aux_sym_source_file_repeat1] = STATE(3), + [ts_builtin_sym_end] = ACTIONS(19), + [sym_bit_sequence] = ACTIONS(21), + [sym_comment] = ACTIONS(24), + [sym__whitespace] = ACTIONS(27), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 2, + ACTIONS(30), 2, + ts_builtin_sym_end, + sym__whitespace, + ACTIONS(32), 2, + sym_bit_sequence, + sym_comment, + [9] = 2, + ACTIONS(34), 1, + ts_builtin_sym_end, + ACTIONS(36), 1, + sym_comment, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(4)] = 0, + [SMALL_STATE(5)] = 9, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT_EXTRA(), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0, 0, 0), + [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(4), + [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2), + [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), + [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(3), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), + [21] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(4), + [24] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(3), + [27] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(3), + [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_binary_instruction, 1, 0, 0), + [32] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_binary_instruction, 1, 0, 0), + [34] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [36] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC const TSLanguage *tree_sitter_hack_binary(void) { + static const TSLanguage language = { + .abi_version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = (const void*)ts_lex_modes, + .lex_fn = ts_lex, + .primary_state_ids = ts_primary_state_ids, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h new file mode 100644 index 0000000..1abdd12 --- /dev/null +++ b/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t size); +extern void *(*ts_current_calloc)(size_t count, size_t size); +extern void *(*ts_current_realloc)(void *ptr, size_t size); +extern void (*ts_current_free)(void *ptr); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h new file mode 100644 index 0000000..a17a574 --- /dev/null +++ b/src/tree_sitter/array.h @@ -0,0 +1,291 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h new file mode 100644 index 0000000..858107d --- /dev/null +++ b/src/tree_sitter/parser.h @@ -0,0 +1,286 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +// Used to index the field and supertype maps. +typedef struct { + uint16_t index; + uint16_t length; +} TSMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); + void (*log)(const TSLexer *, const char *, ...); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; + uint16_t reserved_word_set_id; +} TSLexerMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t abi_version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexerMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; + const char *name; + const TSSymbol *reserved_words; + uint16_t max_reserved_word_set_size; + uint32_t supertype_count; + const TSSymbol *supertype_symbols; + const TSMapSlice *supertype_map_slices; + const TSSymbol *supertype_map_entries; + TSLanguageMetadata metadata; +}; + +static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + const TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + const TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/tree-sitter.json b/tree-sitter.json new file mode 100644 index 0000000..90c0efa --- /dev/null +++ b/tree-sitter.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json", + "grammars": [ + { + "name": "hack_binary", + "camelcase": "HackBinary", + "title": "Hack Binary", + "scope": "source.hack_binary", + "file-types": ["hack"], + "injection-regex": "^hack[-_]?binary$", + "class-name": "TreeSitterHackBinary" + } + ], + "metadata": { + "version": "1.0.0", + "license": "MIT", + "description": "A Tree-sitter grammar for parsing Hack Binary machine code (nand2tetris)", + "authors": [ + { + "name": "Sean O'Connor", + "email": "sean@soconnor.dev" + } + ], + "links": { + "repository": "https://github.com/soconnor0919/nand2tetris-zed" + } + }, + "bindings": { + "c": true, + "go": false, + "node": true, + "python": false, + "rust": true, + "swift": false, + "zig": false + } +}