From 1e86bc6d160b2377fbed328d9b4de21599f7b9e8 Mon Sep 17 00:00:00 2001 From: Sean O'Connor Date: Wed, 10 Sep 2025 23:50:46 -0400 Subject: [PATCH] Initial commit: Tree-sitter grammar for XML language --- grammar.js | 62 +++ parser.dylib | Bin 0 -> 33808 bytes src/grammar.json | 203 ++++++++++ src/node-types.json | 142 +++++++ src/parser.c | 856 +++++++++++++++++++++++++++++++++++++++ src/tree_sitter/alloc.h | 54 +++ src/tree_sitter/array.h | 291 +++++++++++++ src/tree_sitter/parser.h | 286 +++++++++++++ tree-sitter.json | 37 ++ 9 files changed, 1931 insertions(+) create mode 100644 grammar.js create mode 100755 parser.dylib create mode 100644 src/grammar.json create mode 100644 src/node-types.json create mode 100644 src/parser.c create mode 100644 src/tree_sitter/alloc.h create mode 100644 src/tree_sitter/array.h create mode 100644 src/tree_sitter/parser.h create mode 100644 tree-sitter.json diff --git a/grammar.js b/grammar.js new file mode 100644 index 0000000..930de41 --- /dev/null +++ b/grammar.js @@ -0,0 +1,62 @@ +module.exports = grammar({ + name: 'xml', + + rules: { + source_file: $ => repeat($._item), + + _item: $ => choice( + $.element, + $.comment, + $.text, + $._whitespace + ), + + // XML element + element: $ => seq( + '<', + $.tag_name, + repeat($.attribute), + choice( + seq('>', repeat($._item), ''), + '/>' + ) + ), + + // Tag name + tag_name: $ => /[A-Za-z][A-Za-z0-9_-]*/, + + // Attribute + attribute: $ => seq( + $.attribute_name, + '=', + $.attribute_value + ), + + // Attribute name + attribute_name: $ => /[A-Za-z][A-Za-z0-9_-]*/, + + // Attribute value + attribute_value: $ => choice( + seq('"', $.quoted_value, '"'), + seq("'", $.quoted_value, "'") + ), + + // Quoted value content + quoted_value: $ => /[^"']*/, + + // XML comment + comment: $ => token(seq('')), + + // Text content + text: $ => /[^<]+/, + + // Whitespace + _whitespace: $ => /\s+/ + }, + + extras: $ => [ + /\s/, + $.comment + ] +}); + diff --git a/parser.dylib b/parser.dylib new file mode 100755 index 0000000000000000000000000000000000000000..c405e81338f8fd1dab61118809f18cd66c9b26bc GIT binary patch literal 33808 zcmeHQeQ;FO6+dq`n=iiBut@;5Sx}MqwW1+1l;VP*_)$JrP1X9?BoDGRyPN%pd}>!` zXhElyP-nDuYNa?WLF=@XmeG+mR>yIiPDN{BY8{KyaYU;Xk)c2f+uynOzTM4|h(r5F z|2Q`}@7#0lx#ynyyZ3$Vy!Y;F$3H$flE@;YO3)aPzk+Bz`B_EdK^stN+mU%~5v3Zu z2@e*@$Udl<;7TRi&N{2IMgS}Kx5yZ-;z~#?dvsQBD=1eg+IB}WktsSX_xDPTw(hRh zoaI6ON}VAOf0=ALmRMo*<^Edgbis3`+?9=Zif`NQO{|P1I_+3uc@jqD{tg;{%Zvk- zKQBHWhews4xw9j)?Q0e;SgJ=OY#a>3y(nhl0p<}s6}O@N?NKuV6~ekcaziGLYd*-f zZ@PZLjq~PS@6M5&F%KJVWS(1`*uC2X%cDR&pmCs5edy+llg@-PNep))nGVfOcJ#)b zL^cyzlumxb=|INma8|TjIW?4sCOWUkIvB;wrE8qd(28_49*?5(s(5#(2N@@QS?B8R zSUZ}a7NVS*;W^AT$X6P95#%6b9t)0TRgP#hc-wuz4}pz9p6xkt%uF0F+nh-{85txdfWhCswG#$jg^gOx_ zdy++=-b^}#J~J&{oyj`!%jOhOAesz&_S2Ii;$5ZL$lPyE=YmxE{X|cwV3c~;O^|34 z3i}4U_THoR^ZiF|wfYY)^z@%t>+L`BBJ~~iTH8PK()I?*Evbj?uO?mChjs(`UsLMc zDHO4we;CwN89uh(J9ax+SMOcv2|wG_P?R^!qPnIyrKZ4s!>sV;=978<_BCCrnrmtw z3|dWlr&y?8ORKwTi*?vthF{Mg=Isb(W2#$PV;> z8|swDeBgJqm~GtHccNc3{)RQ@{WBlQ-)p7zAIV1!teH?(?Xwm)#VFMQ`3QUs+wNnO z`py`%i~bKBh7b6TLT3Mm(T}ae#GF4wsSW6}i&xFSx8GV-ya^DuNPjc%gRwV+-aI*#S;vhn9}%hjUqzoFEYdhH_%AIo9i1zwK5i#f;Q z#@F$8V0J&Hg5bLgY~5q8tY{rXsj*B>^BRX=A+$V$BE~MSI>{A&tQT&Umhk_ z?r-B5YT_R_u+Dt~?9i`e7DZlV9r8G=Me;`wJL8Kv5jFK97{@~=b&M^@ ze}cR_XG{0-Ee2C=m*ag8cHGv$(ZTyuyBjBby>0v+M*hzxA3&UYP5w>j|A_omhx5~L zTnlV~Z=4eX^P$JFIWQk`EqE6=AMXx;<9aJl4USV{pc-~KZU!#p>`UJzgFu<^eV2=MgUGek6HNhgS=D+s)c?e&Lm!(*~U5h1mLVZnV(1T4>Lr zdR*;lJ6+bStS?s9+OFEzaog-Kgsto2Qj~e%*Y)M0D^SW;6+ZAHdaX)3y-YhpX=j#c zXDMyCOgl$u=MK_Vn24@LSrx9nES$OPupNt-bgVakb9}yn=B86L3kn*qskI0`d{l$2 z9~$jR>w-h*cm&Grnv~9sVhNIO$AK{T@AVx^i>cJV01gE9TWp02y6@gO5utqQvYnDEVNkI!dNBlqgRb) zCHh_sy!75VO6{9|%;q|beE@TM=u`#ua8z1jD4Wvq)8!jrz%+FF;=O2~M(nHOX#$QS z=imzTOVmU`n#eGf{SAL_;!`~y97zq3tAW=F{JE)^Q=_ER>KYq49uzlm)|D-F)nByN zGptnBUJLdfo`LRlIqZzA>ZyTO#Ra{-elN9yro@hb52s>yJaUkY!m{|Tq_kXnEZXat zQk3;gcTv`(==yU}F3NnjQEX5DN>D2EFA1eG-!&Cw%cI-peM;$#to%Udwqtqu*9gdJ z3+~Epy^6AOS9Z%Q%gSBZEw3socV)M{x~$xlO*xboI$c>@tsp?f6gkC#}1MS zdom}t|Lz^0D_a(RBq%8_%oC;M&6iZJ5Dp~H7iGSiEA?OAzP^if+tYWqrS=*RC(%hW zX%@{4QD_!rqbux0H162h zvc^J>65-gQ6LZFre{%|hGr7MXalSP1v4O9P0;-0(?`C*C*aJ%sC8bp)F9-+%f`A|( z2nYg#fFK|U2m*qDARq_`0)l`bAP5Kof`A|(2nYg#fFK|U2m*qDARq_`0)l`bAP5Ko zf`A|(2nYg#fFK|U2m*q@{{#Vk;@f>^L%%gZcz#^Xzzujh{>3 zuU-N0lYZphPr=!&5A$Zov`jho(~k}RgW-D&-*5OG>M80chV*eJ$}Sam?o6txI{H0- zKTTJU-ur2W;WG`t+VDAsFEKn~_$`LF8SWS!H#}|ly@o$v_$I?2HGGTVPZ|E4;jbG0 zj^bzBO{pjd2m*qDARq_`0)l`bAP5Kof`A|(2nYg#fFK|U2m*qDARq_`0)l`bAP5Ko zf`A|(2nYg#fFK|U2m*qDARq_`0)l`bAP5Kof`A|(2nYg#fFK|U2m*qDARq_`0)l`b zAP5Kof`B0Ke}F&`G0Zo_;kfVpbKYGGQLkjD*B7TS*7pl@sZJJ6akHpwXVZ>jXJXl` zleV$+x>}MPe)jNZ1hCa>2lC2uqtTFl-(Rn!H`)WG`n~|x*5q=?`a1(eHyT-gM*!>1 z8(Du(fGA;P{apdVD!977{=NXw4~?w9GeGo|k@fchh+Z(V{>}l>>qa(93ew+L=4BGY zAfyu@mi2cJ^!F0Rpg+8Z9H$_if`gsS*xk-56lCm7JlfrDt3~PUY_y%1zZ*(x$D=*E zWOckf*=@(2Y_v0)jSkWolSDFM1M+w*5$(34-LYtAyM1o?~EM4 zB>6rBpruj(I*hcno;(1Lmvbd@K)g&Vry8RBdx9P;yZ42XZ*T*48@F(vW9_-WJU;U;xBvEmNxxsP&HLoy^S9snet6W+r+(y}eBf)FMOEC-+X#UTk_=(mJd|yd;RshJPX@=`E@(jty%xp?g!^@yyT~+ zEB|=;t|!-Ca8>Ka6Po93vVC3mEMB_3(aODca9(HMm~}_Le!BYqZFywczRmr8{kH|C g?)&Va$sfM>@Xb3{znt5>" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "_item" + } + }, + { + "type": "STRING", + "value": "" + } + ] + }, + { + "type": "STRING", + "value": "/>" + } + ] + } + ] + }, + "tag_name": { + "type": "PATTERN", + "value": "[A-Za-z][A-Za-z0-9_-]*" + }, + "attribute": { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "attribute_name" + }, + { + "type": "STRING", + "value": "=" + }, + { + "type": "SYMBOL", + "name": "attribute_value" + } + ] + }, + "attribute_name": { + "type": "PATTERN", + "value": "[A-Za-z][A-Za-z0-9_-]*" + }, + "attribute_value": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "quoted_value" + }, + { + "type": "STRING", + "value": "\"" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "quoted_value" + }, + { + "type": "STRING", + "value": "'" + } + ] + } + ] + }, + "quoted_value": { + "type": "PATTERN", + "value": "[^\"']*" + }, + "comment": { + "type": "TOKEN", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "" + } + ] + } + }, + "text": { + "type": "PATTERN", + "value": "[^<]+" + }, + "_whitespace": { + "type": "PATTERN", + "value": "\\s+" + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + }, + { + "type": "SYMBOL", + "name": "comment" + } + ], + "conflicts": [], + "precedences": [], + "externals": [], + "inline": [], + "supertypes": [], + "reserved": {} +} \ No newline at end of file diff --git a/src/node-types.json b/src/node-types.json new file mode 100644 index 0000000..d1ec257 --- /dev/null +++ b/src/node-types.json @@ -0,0 +1,142 @@ +[ + { + "type": "attribute", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "attribute_name", + "named": true + }, + { + "type": "attribute_value", + "named": true + } + ] + } + }, + { + "type": "attribute_name", + "named": true, + "fields": {} + }, + { + "type": "attribute_value", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "quoted_value", + "named": true + } + ] + } + }, + { + "type": "element", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "attribute", + "named": true + }, + { + "type": "comment", + "named": true + }, + { + "type": "element", + "named": true + }, + { + "type": "tag_name", + "named": true + }, + { + "type": "text", + "named": true + } + ] + } + }, + { + "type": "source_file", + "named": true, + "root": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { + "type": "comment", + "named": true + }, + { + "type": "element", + "named": true + }, + { + "type": "text", + "named": true + } + ] + } + }, + { + "type": "tag_name", + "named": true, + "fields": {} + }, + { + "type": "\"", + "named": false + }, + { + "type": "'", + "named": false + }, + { + "type": "/>", + "named": false + }, + { + "type": "<", + "named": false + }, + { + "type": "", + "named": false + }, + { + "type": "comment", + "named": true, + "extra": true + }, + { + "type": "quoted_value", + "named": true + }, + { + "type": "text", + "named": true + } +] \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..e365c22 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,856 @@ +/* Automatically @generated by tree-sitter v0.25.8 */ + +#include "tree_sitter/parser.h" + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 14 +#define STATE_COUNT 34 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 22 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 13 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 0 +#define MAX_ALIAS_SEQUENCE_LENGTH 8 +#define MAX_RESERVED_WORD_SET_SIZE 0 +#define PRODUCTION_ID_COUNT 1 +#define SUPERTYPE_COUNT 0 + +enum ts_symbol_identifiers { + anon_sym_LT = 1, + anon_sym_GT = 2, + anon_sym_LT_SLASH = 3, + anon_sym_SLASH_GT = 4, + aux_sym_tag_name_token1 = 5, + anon_sym_EQ = 6, + anon_sym_DQUOTE = 7, + anon_sym_SQUOTE = 8, + sym_quoted_value = 9, + sym_comment = 10, + sym_text = 11, + sym__whitespace = 12, + sym_source_file = 13, + sym__item = 14, + sym_element = 15, + sym_tag_name = 16, + sym_attribute = 17, + sym_attribute_name = 18, + sym_attribute_value = 19, + aux_sym_source_file_repeat1 = 20, + aux_sym_element_repeat1 = 21, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_LT] = "<", + [anon_sym_GT] = ">", + [anon_sym_LT_SLASH] = "", + [aux_sym_tag_name_token1] = "tag_name_token1", + [anon_sym_EQ] = "=", + [anon_sym_DQUOTE] = "\"", + [anon_sym_SQUOTE] = "'", + [sym_quoted_value] = "quoted_value", + [sym_comment] = "comment", + [sym_text] = "text", + [sym__whitespace] = "_whitespace", + [sym_source_file] = "source_file", + [sym__item] = "_item", + [sym_element] = "element", + [sym_tag_name] = "tag_name", + [sym_attribute] = "attribute", + [sym_attribute_name] = "attribute_name", + [sym_attribute_value] = "attribute_value", + [aux_sym_source_file_repeat1] = "source_file_repeat1", + [aux_sym_element_repeat1] = "element_repeat1", +}; + +static const TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_LT] = anon_sym_LT, + [anon_sym_GT] = anon_sym_GT, + [anon_sym_LT_SLASH] = anon_sym_LT_SLASH, + [anon_sym_SLASH_GT] = anon_sym_SLASH_GT, + [aux_sym_tag_name_token1] = aux_sym_tag_name_token1, + [anon_sym_EQ] = anon_sym_EQ, + [anon_sym_DQUOTE] = anon_sym_DQUOTE, + [anon_sym_SQUOTE] = anon_sym_SQUOTE, + [sym_quoted_value] = sym_quoted_value, + [sym_comment] = sym_comment, + [sym_text] = sym_text, + [sym__whitespace] = sym__whitespace, + [sym_source_file] = sym_source_file, + [sym__item] = sym__item, + [sym_element] = sym_element, + [sym_tag_name] = sym_tag_name, + [sym_attribute] = sym_attribute, + [sym_attribute_name] = sym_attribute_name, + [sym_attribute_value] = sym_attribute_value, + [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, + [aux_sym_element_repeat1] = aux_sym_element_repeat1, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_LT] = { + .visible = true, + .named = false, + }, + [anon_sym_GT] = { + .visible = true, + .named = false, + }, + [anon_sym_LT_SLASH] = { + .visible = true, + .named = false, + }, + [anon_sym_SLASH_GT] = { + .visible = true, + .named = false, + }, + [aux_sym_tag_name_token1] = { + .visible = false, + .named = false, + }, + [anon_sym_EQ] = { + .visible = true, + .named = false, + }, + [anon_sym_DQUOTE] = { + .visible = true, + .named = false, + }, + [anon_sym_SQUOTE] = { + .visible = true, + .named = false, + }, + [sym_quoted_value] = { + .visible = true, + .named = true, + }, + [sym_comment] = { + .visible = true, + .named = true, + }, + [sym_text] = { + .visible = true, + .named = true, + }, + [sym__whitespace] = { + .visible = false, + .named = true, + }, + [sym_source_file] = { + .visible = true, + .named = true, + }, + [sym__item] = { + .visible = false, + .named = true, + }, + [sym_element] = { + .visible = true, + .named = true, + }, + [sym_tag_name] = { + .visible = true, + .named = true, + }, + [sym_attribute] = { + .visible = true, + .named = true, + }, + [sym_attribute_name] = { + .visible = true, + .named = true, + }, + [sym_attribute_value] = { + .visible = true, + .named = true, + }, + [aux_sym_source_file_repeat1] = { + .visible = false, + .named = false, + }, + [aux_sym_element_repeat1] = { + .visible = false, + .named = false, + }, +}; + +static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static const uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + +static const TSStateId ts_primary_state_ids[STATE_COUNT] = { + [0] = 0, + [1] = 1, + [2] = 2, + [3] = 3, + [4] = 4, + [5] = 5, + [6] = 6, + [7] = 7, + [8] = 8, + [9] = 9, + [10] = 10, + [11] = 11, + [12] = 12, + [13] = 13, + [14] = 14, + [15] = 15, + [16] = 16, + [17] = 17, + [18] = 18, + [19] = 19, + [20] = 20, + [21] = 21, + [22] = 22, + [23] = 23, + [24] = 24, + [25] = 25, + [26] = 26, + [27] = 27, + [28] = 28, + [29] = 29, + [30] = 30, + [31] = 31, + [32] = 32, + [33] = 33, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(8); + if (lookahead == '"') ADVANCE(15); + if (lookahead == '\'') ADVANCE(16); + if (lookahead == '/') ADVANCE(5); + if (lookahead == '<') ADVANCE(9); + if (lookahead == '=') ADVANCE(14); + if (lookahead == '>') ADVANCE(10); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(0); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(13); + END_STATE(); + case 1: + if (lookahead == '-') ADVANCE(4); + END_STATE(); + case 2: + if (lookahead == '-') ADVANCE(6); + if (lookahead != 0) ADVANCE(4); + END_STATE(); + case 3: + if (lookahead == '-') ADVANCE(1); + END_STATE(); + case 4: + if (lookahead == '-') ADVANCE(2); + if (lookahead != 0) ADVANCE(4); + END_STATE(); + case 5: + if (lookahead == '>') ADVANCE(12); + END_STATE(); + case 6: + if (lookahead == '>') ADVANCE(25); + END_STATE(); + case 7: + if (eof) ADVANCE(8); + if (lookahead == '<') ADVANCE(9); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(26); + if (lookahead != 0) ADVANCE(27); + END_STATE(); + case 8: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 9: + ACCEPT_TOKEN(anon_sym_LT); + if (lookahead == '!') ADVANCE(3); + if (lookahead == '/') ADVANCE(11); + END_STATE(); + case 10: + ACCEPT_TOKEN(anon_sym_GT); + END_STATE(); + case 11: + ACCEPT_TOKEN(anon_sym_LT_SLASH); + END_STATE(); + case 12: + ACCEPT_TOKEN(anon_sym_SLASH_GT); + END_STATE(); + case 13: + ACCEPT_TOKEN(aux_sym_tag_name_token1); + if (lookahead == '-' || + ('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(13); + END_STATE(); + case 14: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 15: + ACCEPT_TOKEN(anon_sym_DQUOTE); + END_STATE(); + case 16: + ACCEPT_TOKEN(anon_sym_SQUOTE); + END_STATE(); + case 17: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '!') ADVANCE(20); + if (lookahead != 0 && + lookahead != '!' && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 18: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '-') ADVANCE(21); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 19: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '-') ADVANCE(23); + if (lookahead == '"' || + lookahead == '\'') ADVANCE(4); + if (lookahead != 0) ADVANCE(21); + END_STATE(); + case 20: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '-') ADVANCE(18); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 21: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '-') ADVANCE(19); + if (lookahead == '"' || + lookahead == '\'') ADVANCE(4); + if (lookahead != 0) ADVANCE(21); + END_STATE(); + case 22: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '<') ADVANCE(17); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(22); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 23: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead == '>') ADVANCE(24); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 24: + ACCEPT_TOKEN(sym_quoted_value); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'') ADVANCE(24); + END_STATE(); + case 25: + ACCEPT_TOKEN(sym_comment); + END_STATE(); + case 26: + ACCEPT_TOKEN(sym_text); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(26); + if (lookahead != 0 && + lookahead != '<') ADVANCE(27); + END_STATE(); + case 27: + ACCEPT_TOKEN(sym_text); + if (lookahead != 0 && + lookahead != '<') ADVANCE(27); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 7}, + [2] = {.lex_state = 7}, + [3] = {.lex_state = 7}, + [4] = {.lex_state = 7}, + [5] = {.lex_state = 7}, + [6] = {.lex_state = 7}, + [7] = {.lex_state = 7}, + [8] = {.lex_state = 0}, + [9] = {.lex_state = 0}, + [10] = {.lex_state = 0}, + [11] = {.lex_state = 7}, + [12] = {.lex_state = 7}, + [13] = {.lex_state = 7}, + [14] = {.lex_state = 7}, + [15] = {.lex_state = 7}, + [16] = {.lex_state = 0}, + [17] = {.lex_state = 0}, + [18] = {.lex_state = 0}, + [19] = {.lex_state = 0}, + [20] = {.lex_state = 0}, + [21] = {.lex_state = 0}, + [22] = {.lex_state = 0}, + [23] = {.lex_state = 0}, + [24] = {.lex_state = 0}, + [25] = {.lex_state = 0}, + [26] = {.lex_state = 0}, + [27] = {.lex_state = 22}, + [28] = {.lex_state = 22}, + [29] = {.lex_state = 0}, + [30] = {.lex_state = 0}, + [31] = {.lex_state = 0}, + [32] = {.lex_state = 0}, + [33] = {.lex_state = 0}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [STATE(0)] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_LT] = ACTIONS(1), + [anon_sym_GT] = ACTIONS(1), + [anon_sym_LT_SLASH] = ACTIONS(1), + [anon_sym_SLASH_GT] = ACTIONS(1), + [aux_sym_tag_name_token1] = ACTIONS(1), + [anon_sym_EQ] = ACTIONS(1), + [anon_sym_DQUOTE] = ACTIONS(1), + [anon_sym_SQUOTE] = ACTIONS(1), + [sym_comment] = ACTIONS(3), + }, + [STATE(1)] = { + [sym_source_file] = STATE(26), + [sym__item] = STATE(4), + [sym_element] = STATE(4), + [aux_sym_source_file_repeat1] = STATE(4), + [ts_builtin_sym_end] = ACTIONS(5), + [anon_sym_LT] = ACTIONS(7), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(9), + [sym__whitespace] = ACTIONS(9), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 5, + ACTIONS(11), 1, + ts_builtin_sym_end, + ACTIONS(13), 1, + anon_sym_LT, + ACTIONS(16), 1, + anon_sym_LT_SLASH, + ACTIONS(18), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(2), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [20] = 4, + ACTIONS(7), 1, + anon_sym_LT, + ACTIONS(21), 1, + anon_sym_LT_SLASH, + ACTIONS(23), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(7), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [37] = 4, + ACTIONS(7), 1, + anon_sym_LT, + ACTIONS(25), 1, + ts_builtin_sym_end, + ACTIONS(27), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(2), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [54] = 4, + ACTIONS(7), 1, + anon_sym_LT, + ACTIONS(21), 1, + anon_sym_LT_SLASH, + ACTIONS(27), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(2), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [71] = 4, + ACTIONS(7), 1, + anon_sym_LT, + ACTIONS(29), 1, + anon_sym_LT_SLASH, + ACTIONS(31), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(5), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [88] = 4, + ACTIONS(7), 1, + anon_sym_LT, + ACTIONS(33), 1, + anon_sym_LT_SLASH, + ACTIONS(27), 3, + sym_comment, + sym_text, + sym__whitespace, + STATE(2), 3, + sym__item, + sym_element, + aux_sym_source_file_repeat1, + [105] = 6, + ACTIONS(3), 1, + sym_comment, + ACTIONS(35), 1, + anon_sym_GT, + ACTIONS(37), 1, + anon_sym_SLASH_GT, + ACTIONS(39), 1, + aux_sym_tag_name_token1, + STATE(24), 1, + sym_attribute_name, + STATE(10), 2, + sym_attribute, + aux_sym_element_repeat1, + [125] = 6, + ACTIONS(3), 1, + sym_comment, + ACTIONS(39), 1, + aux_sym_tag_name_token1, + ACTIONS(41), 1, + anon_sym_GT, + ACTIONS(43), 1, + anon_sym_SLASH_GT, + STATE(24), 1, + sym_attribute_name, + STATE(8), 2, + sym_attribute, + aux_sym_element_repeat1, + [145] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(47), 1, + aux_sym_tag_name_token1, + STATE(24), 1, + sym_attribute_name, + ACTIONS(45), 2, + anon_sym_GT, + anon_sym_SLASH_GT, + STATE(10), 2, + sym_attribute, + aux_sym_element_repeat1, + [163] = 2, + ACTIONS(50), 1, + ts_builtin_sym_end, + ACTIONS(52), 5, + anon_sym_LT, + anon_sym_LT_SLASH, + sym_comment, + sym_text, + sym__whitespace, + [174] = 2, + ACTIONS(54), 1, + ts_builtin_sym_end, + ACTIONS(56), 5, + anon_sym_LT, + anon_sym_LT_SLASH, + sym_comment, + sym_text, + sym__whitespace, + [185] = 2, + ACTIONS(58), 1, + ts_builtin_sym_end, + ACTIONS(60), 5, + anon_sym_LT, + anon_sym_LT_SLASH, + sym_comment, + sym_text, + sym__whitespace, + [196] = 2, + ACTIONS(62), 1, + ts_builtin_sym_end, + ACTIONS(64), 5, + anon_sym_LT, + anon_sym_LT_SLASH, + sym_comment, + sym_text, + sym__whitespace, + [207] = 2, + ACTIONS(66), 1, + ts_builtin_sym_end, + ACTIONS(68), 5, + anon_sym_LT, + anon_sym_LT_SLASH, + sym_comment, + sym_text, + sym__whitespace, + [218] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(70), 1, + anon_sym_DQUOTE, + ACTIONS(72), 1, + anon_sym_SQUOTE, + STATE(18), 1, + sym_attribute_value, + [231] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(74), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + aux_sym_tag_name_token1, + [240] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(76), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + aux_sym_tag_name_token1, + [249] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(78), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + aux_sym_tag_name_token1, + [258] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(80), 1, + aux_sym_tag_name_token1, + STATE(9), 1, + sym_tag_name, + [268] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(80), 1, + aux_sym_tag_name_token1, + STATE(32), 1, + sym_tag_name, + [278] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(80), 1, + aux_sym_tag_name_token1, + STATE(33), 1, + sym_tag_name, + [288] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(80), 1, + aux_sym_tag_name_token1, + STATE(31), 1, + sym_tag_name, + [298] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(82), 1, + anon_sym_EQ, + [305] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(84), 1, + anon_sym_EQ, + [312] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(86), 1, + ts_builtin_sym_end, + [319] = 2, + ACTIONS(88), 1, + sym_quoted_value, + ACTIONS(90), 1, + sym_comment, + [326] = 2, + ACTIONS(90), 1, + sym_comment, + ACTIONS(92), 1, + sym_quoted_value, + [333] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(94), 1, + anon_sym_SQUOTE, + [340] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(94), 1, + anon_sym_DQUOTE, + [347] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(96), 1, + anon_sym_GT, + [354] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(98), 1, + anon_sym_GT, + [361] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(100), 1, + anon_sym_GT, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 20, + [SMALL_STATE(4)] = 37, + [SMALL_STATE(5)] = 54, + [SMALL_STATE(6)] = 71, + [SMALL_STATE(7)] = 88, + [SMALL_STATE(8)] = 105, + [SMALL_STATE(9)] = 125, + [SMALL_STATE(10)] = 145, + [SMALL_STATE(11)] = 163, + [SMALL_STATE(12)] = 174, + [SMALL_STATE(13)] = 185, + [SMALL_STATE(14)] = 196, + [SMALL_STATE(15)] = 207, + [SMALL_STATE(16)] = 218, + [SMALL_STATE(17)] = 231, + [SMALL_STATE(18)] = 240, + [SMALL_STATE(19)] = 249, + [SMALL_STATE(20)] = 258, + [SMALL_STATE(21)] = 268, + [SMALL_STATE(22)] = 278, + [SMALL_STATE(23)] = 288, + [SMALL_STATE(24)] = 298, + [SMALL_STATE(25)] = 305, + [SMALL_STATE(26)] = 312, + [SMALL_STATE(27)] = 319, + [SMALL_STATE(28)] = 326, + [SMALL_STATE(29)] = 333, + [SMALL_STATE(30)] = 340, + [SMALL_STATE(31)] = 347, + [SMALL_STATE(32)] = 354, + [SMALL_STATE(33)] = 361, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0, 0, 0), + [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(20), + [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(4), + [11] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), + [13] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(20), + [16] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), + [18] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(2), + [21] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), + [23] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7), + [25] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), + [27] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2), + [29] = {.entry = {.count = 1, .reusable = false}}, SHIFT(21), + [31] = {.entry = {.count = 1, .reusable = false}}, SHIFT(5), + [33] = {.entry = {.count = 1, .reusable = false}}, SHIFT(22), + [35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [37] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [39] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), + [41] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [43] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), + [45] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_element_repeat1, 2, 0, 0), + [47] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_element_repeat1, 2, 0, 0), SHIFT_REPEAT(25), + [50] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 3, 0, 0), + [52] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 3, 0, 0), + [54] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 4, 0, 0), + [56] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 4, 0, 0), + [58] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 6, 0, 0), + [60] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 6, 0, 0), + [62] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 7, 0, 0), + [64] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 7, 0, 0), + [66] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 8, 0, 0), + [68] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 8, 0, 0), + [70] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), + [72] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), + [74] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tag_name, 1, 0, 0), + [76] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 3, 0, 0), + [78] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute_value, 3, 0, 0), + [80] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [82] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), + [84] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute_name, 1, 0, 0), + [86] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [88] = {.entry = {.count = 1, .reusable = false}}, SHIFT(30), + [90] = {.entry = {.count = 1, .reusable = false}}, SHIFT_EXTRA(), + [92] = {.entry = {.count = 1, .reusable = false}}, SHIFT(29), + [94] = {.entry = {.count = 1, .reusable = true}}, SHIFT(19), + [96] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), + [98] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [100] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC const TSLanguage *tree_sitter_xml(void) { + static const TSLanguage language = { + .abi_version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = (const void*)ts_lex_modes, + .lex_fn = ts_lex, + .primary_state_ids = ts_primary_state_ids, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h new file mode 100644 index 0000000..1abdd12 --- /dev/null +++ b/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t size); +extern void *(*ts_current_calloc)(size_t count, size_t size); +extern void *(*ts_current_realloc)(void *ptr, size_t size); +extern void (*ts_current_free)(void *ptr); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h new file mode 100644 index 0000000..a17a574 --- /dev/null +++ b/src/tree_sitter/array.h @@ -0,0 +1,291 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h new file mode 100644 index 0000000..858107d --- /dev/null +++ b/src/tree_sitter/parser.h @@ -0,0 +1,286 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +// Used to index the field and supertype maps. +typedef struct { + uint16_t index; + uint16_t length; +} TSMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); + void (*log)(const TSLexer *, const char *, ...); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; + uint16_t reserved_word_set_id; +} TSLexerMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t abi_version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexerMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; + const char *name; + const TSSymbol *reserved_words; + uint16_t max_reserved_word_set_size; + uint32_t supertype_count; + const TSSymbol *supertype_symbols; + const TSMapSlice *supertype_map_slices; + const TSSymbol *supertype_map_entries; + TSLanguageMetadata metadata; +}; + +static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + const TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + const TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/tree-sitter.json b/tree-sitter.json new file mode 100644 index 0000000..9eeb573 --- /dev/null +++ b/tree-sitter.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json", + "grammars": [ + { + "name": "xml", + "camelcase": "Xml", + "title": "XML", + "scope": "source.xml", + "file-types": ["xml"], + "injection-regex": "^xml$", + "class-name": "TreeSitterXml" + } + ], + "metadata": { + "version": "1.0.0", + "license": "MIT", + "description": "A Tree-sitter grammar for parsing XML files (nand2tetris compiler output)", + "authors": [ + { + "name": "Sean O'Connor", + "email": "sean@soconnor.dev" + } + ], + "links": { + "repository": "https://github.com/soconnor0919/nand2tetris-zed" + } + }, + "bindings": { + "c": true, + "go": false, + "node": true, + "python": false, + "rust": true, + "swift": false, + "zig": false + } +}