Add tests and modify grammar

This commit is contained in:
Jeff 2023-08-22 17:28:19 -04:00
parent 00a15c1704
commit f2d0c4af96
5 changed files with 352 additions and 86 deletions

View File

@ -14,3 +14,63 @@ x # xyz
(expression (expression
(identifier)) (identifier))
(comment)) (comment))
==================
Identifiers
==================
variable_name
_unused_variable
__strange_format__
a
blahblah
x.x
---
(source_file
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier)))
==================
Operators
==================
+ - = / & |
---
(source_file
(expression
(operator)
(operator)
(operator)
(operator)
(operator)
(operator)))
==================
Expressions
==================
x_x = 1;
---
(source_file
(expression
(identifier))
(operator)
(integer)))

View File

@ -4,18 +4,24 @@ module.exports = grammar({
rules: { rules: {
source_file: $ => repeat(choice($.comment, $.expression)), source_file: $ => repeat(choice($.comment, $.expression)),
identifier: $ => /[a-zA-Z|_|.]+(_[a-zA-Z]+)*/,
comment: $ => /(#)(.+?)([\n\r])/, comment: $ => /(#)(.+?)([\n\r])/,
expression: $ => choice( expression: $ => choice(
$.identifier $.identifier,
// TODO: other kinds of definitions seq($.identifier, $.operator, $.identifier)
), ),
identifier: $ => /[a-zA-Z]+(_[a-zA-Z]+)*/,
operator: $ => choice( operator: $ => choice(
'=', '=',
"+", '-',
'+',
';',
'/',
'|',
'&'
), ),
integer: $ => /\d/, integer: $ => /\d/,

View File

@ -17,6 +17,10 @@
] ]
} }
}, },
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z|_|.]+(_[a-zA-Z]+)*"
},
"comment": { "comment": {
"type": "PATTERN", "type": "PATTERN",
"value": "(#)(.+?)([\\n\\r])" "value": "(#)(.+?)([\\n\\r])"
@ -27,13 +31,26 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "identifier" "name": "identifier"
},
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "SYMBOL",
"name": "operator"
},
{
"type": "SYMBOL",
"name": "identifier"
}
]
} }
] ]
}, },
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+(_[a-zA-Z]+)*"
},
"operator": { "operator": {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
@ -41,9 +58,29 @@
"type": "STRING", "type": "STRING",
"value": "=" "value": "="
}, },
{
"type": "STRING",
"value": "-"
},
{ {
"type": "STRING", "type": "STRING",
"value": "+" "value": "+"
},
{
"type": "STRING",
"value": ";"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "STRING",
"value": "|"
},
{
"type": "STRING",
"value": "&"
} }
] ]
}, },

View File

@ -4,16 +4,25 @@
"named": true, "named": true,
"fields": {}, "fields": {},
"children": { "children": {
"multiple": false, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "identifier", "type": "identifier",
"named": true "named": true
},
{
"type": "operator",
"named": true
} }
] ]
} }
}, },
{
"type": "operator",
"named": true,
"fields": {}
},
{ {
"type": "source_file", "type": "source_file",
"named": true, "named": true,
@ -33,10 +42,26 @@
] ]
} }
}, },
{
"type": "&",
"named": false
},
{ {
"type": "+", "type": "+",
"named": false "named": false
}, },
{
"type": "-",
"named": false
},
{
"type": "/",
"named": false
},
{
"type": ";",
"named": false
},
{ {
"type": "=", "type": "=",
"named": false "named": false
@ -48,5 +73,9 @@
{ {
"type": "identifier", "type": "identifier",
"named": true "named": true
},
{
"type": "|",
"named": false
} }
] ]

View File

@ -6,48 +6,66 @@
#endif #endif
#define LANGUAGE_VERSION 14 #define LANGUAGE_VERSION 14
#define STATE_COUNT 6 #define STATE_COUNT 9
#define LARGE_STATE_COUNT 4 #define LARGE_STATE_COUNT 3
#define SYMBOL_COUNT 9 #define SYMBOL_COUNT 15
#define ALIAS_COUNT 0 #define ALIAS_COUNT 0
#define TOKEN_COUNT 6 #define TOKEN_COUNT 11
#define EXTERNAL_TOKEN_COUNT 0 #define EXTERNAL_TOKEN_COUNT 0
#define FIELD_COUNT 0 #define FIELD_COUNT 0
#define MAX_ALIAS_SEQUENCE_LENGTH 2 #define MAX_ALIAS_SEQUENCE_LENGTH 3
#define PRODUCTION_ID_COUNT 1 #define PRODUCTION_ID_COUNT 1
enum { enum {
sym_comment = 1, sym_identifier = 1,
sym_identifier = 2, sym_comment = 2,
anon_sym_EQ = 3, anon_sym_EQ = 3,
anon_sym_PLUS = 4, anon_sym_DASH = 4,
sym_integer = 5, anon_sym_PLUS = 5,
sym_source_file = 6, anon_sym_SEMI = 6,
sym_expression = 7, anon_sym_SLASH = 7,
aux_sym_source_file_repeat1 = 8, anon_sym_PIPE = 8,
anon_sym_AMP = 9,
sym_integer = 10,
sym_source_file = 11,
sym_expression = 12,
sym_operator = 13,
aux_sym_source_file_repeat1 = 14,
}; };
static const char * const ts_symbol_names[] = { static const char * const ts_symbol_names[] = {
[ts_builtin_sym_end] = "end", [ts_builtin_sym_end] = "end",
[sym_comment] = "comment",
[sym_identifier] = "identifier", [sym_identifier] = "identifier",
[sym_comment] = "comment",
[anon_sym_EQ] = "=", [anon_sym_EQ] = "=",
[anon_sym_DASH] = "-",
[anon_sym_PLUS] = "+", [anon_sym_PLUS] = "+",
[anon_sym_SEMI] = ";",
[anon_sym_SLASH] = "/",
[anon_sym_PIPE] = "|",
[anon_sym_AMP] = "&",
[sym_integer] = "integer", [sym_integer] = "integer",
[sym_source_file] = "source_file", [sym_source_file] = "source_file",
[sym_expression] = "expression", [sym_expression] = "expression",
[sym_operator] = "operator",
[aux_sym_source_file_repeat1] = "source_file_repeat1", [aux_sym_source_file_repeat1] = "source_file_repeat1",
}; };
static const TSSymbol ts_symbol_map[] = { static const TSSymbol ts_symbol_map[] = {
[ts_builtin_sym_end] = ts_builtin_sym_end, [ts_builtin_sym_end] = ts_builtin_sym_end,
[sym_comment] = sym_comment,
[sym_identifier] = sym_identifier, [sym_identifier] = sym_identifier,
[sym_comment] = sym_comment,
[anon_sym_EQ] = anon_sym_EQ, [anon_sym_EQ] = anon_sym_EQ,
[anon_sym_DASH] = anon_sym_DASH,
[anon_sym_PLUS] = anon_sym_PLUS, [anon_sym_PLUS] = anon_sym_PLUS,
[anon_sym_SEMI] = anon_sym_SEMI,
[anon_sym_SLASH] = anon_sym_SLASH,
[anon_sym_PIPE] = anon_sym_PIPE,
[anon_sym_AMP] = anon_sym_AMP,
[sym_integer] = sym_integer, [sym_integer] = sym_integer,
[sym_source_file] = sym_source_file, [sym_source_file] = sym_source_file,
[sym_expression] = sym_expression, [sym_expression] = sym_expression,
[sym_operator] = sym_operator,
[aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1,
}; };
@ -56,11 +74,11 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = false, .visible = false,
.named = true, .named = true,
}, },
[sym_comment] = { [sym_identifier] = {
.visible = true, .visible = true,
.named = true, .named = true,
}, },
[sym_identifier] = { [sym_comment] = {
.visible = true, .visible = true,
.named = true, .named = true,
}, },
@ -68,10 +86,30 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true, .visible = true,
.named = false, .named = false,
}, },
[anon_sym_DASH] = {
.visible = true,
.named = false,
},
[anon_sym_PLUS] = { [anon_sym_PLUS] = {
.visible = true, .visible = true,
.named = false, .named = false,
}, },
[anon_sym_SEMI] = {
.visible = true,
.named = false,
},
[anon_sym_SLASH] = {
.visible = true,
.named = false,
},
[anon_sym_PIPE] = {
.visible = true,
.named = false,
},
[anon_sym_AMP] = {
.visible = true,
.named = false,
},
[sym_integer] = { [sym_integer] = {
.visible = true, .visible = true,
.named = true, .named = true,
@ -84,6 +122,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true, .visible = true,
.named = true, .named = true,
}, },
[sym_operator] = {
.visible = true,
.named = true,
},
[aux_sym_source_file_repeat1] = { [aux_sym_source_file_repeat1] = {
.visible = false, .visible = false,
.named = false, .named = false,
@ -105,6 +147,9 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
[3] = 3, [3] = 3,
[4] = 4, [4] = 4,
[5] = 5, [5] = 5,
[6] = 6,
[7] = 7,
[8] = 8,
}; };
static bool ts_lex(TSLexer *lexer, TSStateId state) { static bool ts_lex(TSLexer *lexer, TSStateId state) {
@ -113,55 +158,101 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
switch (state) { switch (state) {
case 0: case 0:
if (eof) ADVANCE(4); if (eof) ADVANCE(4);
if (lookahead == '#') ADVANCE(3); if (lookahead == '#') ADVANCE(2);
if (lookahead == '+') ADVANCE(9); if (lookahead == '&') ADVANCE(15);
if (lookahead == '=') ADVANCE(8); if (lookahead == '+') ADVANCE(11);
if (lookahead == '-') ADVANCE(10);
if (lookahead == '/') ADVANCE(13);
if (lookahead == ';') ADVANCE(12);
if (lookahead == '=') ADVANCE(9);
if (lookahead == '|') ADVANCE(14);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\n' || lookahead == '\n' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') SKIP(0) lookahead == ' ') SKIP(0)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(10); if (('0' <= lookahead && lookahead <= '9')) ADVANCE(16);
if (('A' <= lookahead && lookahead <= 'Z') || if (lookahead == '.' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7); ('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(6);
END_STATE(); END_STATE();
case 1: case 1:
if (lookahead == '\n') ADVANCE(5); if (lookahead == '\n') ADVANCE(7);
if (lookahead == '\r') ADVANCE(6); if (lookahead == '\r') ADVANCE(8);
if (lookahead != 0) ADVANCE(1); if (lookahead != 0) ADVANCE(1);
END_STATE(); END_STATE();
case 2: case 2:
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7);
END_STATE();
case 3:
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n') ADVANCE(1); lookahead != '\n') ADVANCE(1);
END_STATE(); END_STATE();
case 3:
if (eof) ADVANCE(4);
if (lookahead == '#') ADVANCE(2);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(3)
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE();
case 4: case 4:
ACCEPT_TOKEN(ts_builtin_sym_end); ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE(); END_STATE();
case 5: case 5:
ACCEPT_TOKEN(sym_comment); ACCEPT_TOKEN(sym_identifier);
if (lookahead == '_') ADVANCE(5);
if (lookahead == '.' ||
lookahead == '|') ADVANCE(6);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(5);
END_STATE(); END_STATE();
case 6: case 6:
ACCEPT_TOKEN(sym_comment); ACCEPT_TOKEN(sym_identifier);
if (lookahead == '\n') ADVANCE(5); if (lookahead == '_') ADVANCE(5);
if (lookahead == '\r') ADVANCE(6); if (lookahead == '.' ||
if (lookahead != 0) ADVANCE(1); ('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE(); END_STATE();
case 7: case 7:
ACCEPT_TOKEN(sym_identifier); ACCEPT_TOKEN(sym_comment);
if (lookahead == '_') ADVANCE(2);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7);
END_STATE(); END_STATE();
case 8: case 8:
ACCEPT_TOKEN(anon_sym_EQ); ACCEPT_TOKEN(sym_comment);
if (lookahead == '\n') ADVANCE(7);
if (lookahead == '\r') ADVANCE(8);
if (lookahead != 0) ADVANCE(1);
END_STATE(); END_STATE();
case 9: case 9:
ACCEPT_TOKEN(anon_sym_PLUS); ACCEPT_TOKEN(anon_sym_EQ);
END_STATE(); END_STATE();
case 10: case 10:
ACCEPT_TOKEN(anon_sym_DASH);
END_STATE();
case 11:
ACCEPT_TOKEN(anon_sym_PLUS);
END_STATE();
case 12:
ACCEPT_TOKEN(anon_sym_SEMI);
END_STATE();
case 13:
ACCEPT_TOKEN(anon_sym_SLASH);
END_STATE();
case 14:
ACCEPT_TOKEN(anon_sym_PIPE);
if (lookahead == '_') ADVANCE(5);
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE();
case 15:
ACCEPT_TOKEN(anon_sym_AMP);
END_STATE();
case 16:
ACCEPT_TOKEN(sym_integer); ACCEPT_TOKEN(sym_integer);
END_STATE(); END_STATE();
default: default:
@ -171,60 +262,97 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
static const TSLexMode ts_lex_modes[STATE_COUNT] = { static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[0] = {.lex_state = 0}, [0] = {.lex_state = 0},
[1] = {.lex_state = 0}, [1] = {.lex_state = 3},
[2] = {.lex_state = 0}, [2] = {.lex_state = 0},
[3] = {.lex_state = 0}, [3] = {.lex_state = 3},
[4] = {.lex_state = 0}, [4] = {.lex_state = 3},
[5] = {.lex_state = 0}, [5] = {.lex_state = 3},
[6] = {.lex_state = 0},
[7] = {.lex_state = 3},
[8] = {.lex_state = 3},
}; };
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[0] = { [0] = {
[ts_builtin_sym_end] = ACTIONS(1), [ts_builtin_sym_end] = ACTIONS(1),
[sym_comment] = ACTIONS(1),
[sym_identifier] = ACTIONS(1), [sym_identifier] = ACTIONS(1),
[sym_comment] = ACTIONS(1),
[anon_sym_EQ] = ACTIONS(1), [anon_sym_EQ] = ACTIONS(1),
[anon_sym_DASH] = ACTIONS(1),
[anon_sym_PLUS] = ACTIONS(1), [anon_sym_PLUS] = ACTIONS(1),
[anon_sym_SEMI] = ACTIONS(1),
[anon_sym_SLASH] = ACTIONS(1),
[anon_sym_PIPE] = ACTIONS(1),
[anon_sym_AMP] = ACTIONS(1),
[sym_integer] = ACTIONS(1), [sym_integer] = ACTIONS(1),
}, },
[1] = { [1] = {
[sym_source_file] = STATE(5), [sym_source_file] = STATE(6),
[sym_expression] = STATE(2), [sym_expression] = STATE(3),
[aux_sym_source_file_repeat1] = STATE(2), [aux_sym_source_file_repeat1] = STATE(3),
[ts_builtin_sym_end] = ACTIONS(3), [ts_builtin_sym_end] = ACTIONS(3),
[sym_comment] = ACTIONS(5), [sym_identifier] = ACTIONS(5),
[sym_identifier] = ACTIONS(7), [sym_comment] = ACTIONS(7),
}, },
[2] = { [2] = {
[sym_expression] = STATE(3), [sym_operator] = STATE(8),
[aux_sym_source_file_repeat1] = STATE(3),
[ts_builtin_sym_end] = ACTIONS(9), [ts_builtin_sym_end] = ACTIONS(9),
[sym_comment] = ACTIONS(11), [sym_identifier] = ACTIONS(11),
[sym_identifier] = ACTIONS(7), [sym_comment] = ACTIONS(9),
}, [anon_sym_EQ] = ACTIONS(13),
[3] = { [anon_sym_DASH] = ACTIONS(13),
[sym_expression] = STATE(3), [anon_sym_PLUS] = ACTIONS(13),
[aux_sym_source_file_repeat1] = STATE(3), [anon_sym_SEMI] = ACTIONS(13),
[ts_builtin_sym_end] = ACTIONS(13), [anon_sym_SLASH] = ACTIONS(13),
[sym_comment] = ACTIONS(15), [anon_sym_PIPE] = ACTIONS(15),
[sym_identifier] = ACTIONS(18), [anon_sym_AMP] = ACTIONS(13),
}, },
}; };
static const uint16_t ts_small_parse_table[] = { static const uint16_t ts_small_parse_table[] = {
[0] = 1, [0] = 4,
ACTIONS(21), 3, ACTIONS(5), 1,
ts_builtin_sym_end,
sym_comment,
sym_identifier, sym_identifier,
[6] = 1, ACTIONS(17), 1,
ACTIONS(23), 1,
ts_builtin_sym_end, ts_builtin_sym_end,
ACTIONS(19), 1,
sym_comment,
STATE(4), 2,
sym_expression,
aux_sym_source_file_repeat1,
[14] = 4,
ACTIONS(21), 1,
ts_builtin_sym_end,
ACTIONS(23), 1,
sym_identifier,
ACTIONS(26), 1,
sym_comment,
STATE(4), 2,
sym_expression,
aux_sym_source_file_repeat1,
[28] = 1,
ACTIONS(29), 3,
ts_builtin_sym_end,
sym_identifier,
sym_comment,
[34] = 1,
ACTIONS(31), 1,
ts_builtin_sym_end,
[38] = 1,
ACTIONS(33), 1,
sym_identifier,
[42] = 1,
ACTIONS(35), 1,
sym_identifier,
}; };
static const uint32_t ts_small_parse_table_map[] = { static const uint32_t ts_small_parse_table_map[] = {
[SMALL_STATE(4)] = 0, [SMALL_STATE(3)] = 0,
[SMALL_STATE(5)] = 6, [SMALL_STATE(4)] = 14,
[SMALL_STATE(5)] = 28,
[SMALL_STATE(6)] = 34,
[SMALL_STATE(7)] = 38,
[SMALL_STATE(8)] = 42,
}; };
static const TSParseActionEntry ts_parse_actions[] = { static const TSParseActionEntry ts_parse_actions[] = {
@ -232,14 +360,20 @@ static const TSParseActionEntry ts_parse_actions[] = {
[1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
[3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0),
[5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), [9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1),
[11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), [11] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_expression, 1),
[13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7),
[15] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(3), [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7),
[18] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4), [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1), [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[23] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), [21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2),
[23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(2),
[26] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4),
[29] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 3),
[31] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[33] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_operator, 1),
[35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5),
}; };
#ifdef __cplusplus #ifdef __cplusplus