Initial commit: Tree-sitter grammar for XML language

This commit is contained in:
2025-09-10 23:50:46 -04:00
commit 1e86bc6d16
9 changed files with 1931 additions and 0 deletions

62
grammar.js Normal file
View File

@@ -0,0 +1,62 @@
module.exports = grammar({
name: 'xml',
rules: {
source_file: $ => repeat($._item),
_item: $ => choice(
$.element,
$.comment,
$.text,
$._whitespace
),
// XML element
element: $ => seq(
'<',
$.tag_name,
repeat($.attribute),
choice(
seq('>', repeat($._item), '</', $.tag_name, '>'),
'/>'
)
),
// Tag name
tag_name: $ => /[A-Za-z][A-Za-z0-9_-]*/,
// Attribute
attribute: $ => seq(
$.attribute_name,
'=',
$.attribute_value
),
// Attribute name
attribute_name: $ => /[A-Za-z][A-Za-z0-9_-]*/,
// Attribute value
attribute_value: $ => choice(
seq('"', $.quoted_value, '"'),
seq("'", $.quoted_value, "'")
),
// Quoted value content
quoted_value: $ => /[^"']*/,
// XML comment
comment: $ => token(seq('<!--', /[^-]*(?:-[^-]+)*/, '-->')),
// Text content
text: $ => /[^<]+/,
// Whitespace
_whitespace: $ => /\s+/
},
extras: $ => [
/\s/,
$.comment
]
});