Add tests and modify grammar

This commit is contained in:
Jeff 2023-08-22 17:28:19 -04:00
parent 00a15c1704
commit f2d0c4af96
5 changed files with 352 additions and 86 deletions

View File

@ -14,3 +14,63 @@ x # xyz
(expression
(identifier))
(comment))
==================
Identifiers
==================
variable_name
_unused_variable
__strange_format__
a
blahblah
x.x
---
(source_file
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier))
(expression
(identifier)))
==================
Operators
==================
+ - = / & |
---
(source_file
(expression
(operator)
(operator)
(operator)
(operator)
(operator)
(operator)))
==================
Expressions
==================
x_x = 1;
---
(source_file
(expression
(identifier))
(operator)
(integer)))

View File

@ -4,18 +4,24 @@ module.exports = grammar({
rules: {
source_file: $ => repeat(choice($.comment, $.expression)),
identifier: $ => /[a-zA-Z|_|.]+(_[a-zA-Z]+)*/,
comment: $ => /(#)(.+?)([\n\r])/,
expression: $ => choice(
$.identifier
// TODO: other kinds of definitions
$.identifier,
seq($.identifier, $.operator, $.identifier)
),
identifier: $ => /[a-zA-Z]+(_[a-zA-Z]+)*/,
operator: $ => choice(
'=',
"+",
'-',
'+',
';',
'/',
'|',
'&'
),
integer: $ => /\d/,

View File

@ -17,6 +17,10 @@
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z|_|.]+(_[a-zA-Z]+)*"
},
"comment": {
"type": "PATTERN",
"value": "(#)(.+?)([\\n\\r])"
@ -27,13 +31,26 @@
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "SYMBOL",
"name": "operator"
},
{
"type": "SYMBOL",
"name": "identifier"
}
]
}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+(_[a-zA-Z]+)*"
},
"operator": {
"type": "CHOICE",
"members": [
@ -41,9 +58,29 @@
"type": "STRING",
"value": "="
},
{
"type": "STRING",
"value": "-"
},
{
"type": "STRING",
"value": "+"
},
{
"type": "STRING",
"value": ";"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "STRING",
"value": "|"
},
{
"type": "STRING",
"value": "&"
}
]
},

View File

@ -4,16 +4,25 @@
"named": true,
"fields": {},
"children": {
"multiple": false,
"multiple": true,
"required": true,
"types": [
{
"type": "identifier",
"named": true
},
{
"type": "operator",
"named": true
}
]
}
},
{
"type": "operator",
"named": true,
"fields": {}
},
{
"type": "source_file",
"named": true,
@ -33,10 +42,26 @@
]
}
},
{
"type": "&",
"named": false
},
{
"type": "+",
"named": false
},
{
"type": "-",
"named": false
},
{
"type": "/",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "=",
"named": false
@ -48,5 +73,9 @@
{
"type": "identifier",
"named": true
},
{
"type": "|",
"named": false
}
]

View File

@ -6,48 +6,66 @@
#endif
#define LANGUAGE_VERSION 14
#define STATE_COUNT 6
#define LARGE_STATE_COUNT 4
#define SYMBOL_COUNT 9
#define STATE_COUNT 9
#define LARGE_STATE_COUNT 3
#define SYMBOL_COUNT 15
#define ALIAS_COUNT 0
#define TOKEN_COUNT 6
#define TOKEN_COUNT 11
#define EXTERNAL_TOKEN_COUNT 0
#define FIELD_COUNT 0
#define MAX_ALIAS_SEQUENCE_LENGTH 2
#define MAX_ALIAS_SEQUENCE_LENGTH 3
#define PRODUCTION_ID_COUNT 1
enum {
sym_comment = 1,
sym_identifier = 2,
sym_identifier = 1,
sym_comment = 2,
anon_sym_EQ = 3,
anon_sym_PLUS = 4,
sym_integer = 5,
sym_source_file = 6,
sym_expression = 7,
aux_sym_source_file_repeat1 = 8,
anon_sym_DASH = 4,
anon_sym_PLUS = 5,
anon_sym_SEMI = 6,
anon_sym_SLASH = 7,
anon_sym_PIPE = 8,
anon_sym_AMP = 9,
sym_integer = 10,
sym_source_file = 11,
sym_expression = 12,
sym_operator = 13,
aux_sym_source_file_repeat1 = 14,
};
static const char * const ts_symbol_names[] = {
[ts_builtin_sym_end] = "end",
[sym_comment] = "comment",
[sym_identifier] = "identifier",
[sym_comment] = "comment",
[anon_sym_EQ] = "=",
[anon_sym_DASH] = "-",
[anon_sym_PLUS] = "+",
[anon_sym_SEMI] = ";",
[anon_sym_SLASH] = "/",
[anon_sym_PIPE] = "|",
[anon_sym_AMP] = "&",
[sym_integer] = "integer",
[sym_source_file] = "source_file",
[sym_expression] = "expression",
[sym_operator] = "operator",
[aux_sym_source_file_repeat1] = "source_file_repeat1",
};
static const TSSymbol ts_symbol_map[] = {
[ts_builtin_sym_end] = ts_builtin_sym_end,
[sym_comment] = sym_comment,
[sym_identifier] = sym_identifier,
[sym_comment] = sym_comment,
[anon_sym_EQ] = anon_sym_EQ,
[anon_sym_DASH] = anon_sym_DASH,
[anon_sym_PLUS] = anon_sym_PLUS,
[anon_sym_SEMI] = anon_sym_SEMI,
[anon_sym_SLASH] = anon_sym_SLASH,
[anon_sym_PIPE] = anon_sym_PIPE,
[anon_sym_AMP] = anon_sym_AMP,
[sym_integer] = sym_integer,
[sym_source_file] = sym_source_file,
[sym_expression] = sym_expression,
[sym_operator] = sym_operator,
[aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1,
};
@ -56,11 +74,11 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = false,
.named = true,
},
[sym_comment] = {
[sym_identifier] = {
.visible = true,
.named = true,
},
[sym_identifier] = {
[sym_comment] = {
.visible = true,
.named = true,
},
@ -68,10 +86,30 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true,
.named = false,
},
[anon_sym_DASH] = {
.visible = true,
.named = false,
},
[anon_sym_PLUS] = {
.visible = true,
.named = false,
},
[anon_sym_SEMI] = {
.visible = true,
.named = false,
},
[anon_sym_SLASH] = {
.visible = true,
.named = false,
},
[anon_sym_PIPE] = {
.visible = true,
.named = false,
},
[anon_sym_AMP] = {
.visible = true,
.named = false,
},
[sym_integer] = {
.visible = true,
.named = true,
@ -84,6 +122,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true,
.named = true,
},
[sym_operator] = {
.visible = true,
.named = true,
},
[aux_sym_source_file_repeat1] = {
.visible = false,
.named = false,
@ -105,6 +147,9 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
[3] = 3,
[4] = 4,
[5] = 5,
[6] = 6,
[7] = 7,
[8] = 8,
};
static bool ts_lex(TSLexer *lexer, TSStateId state) {
@ -113,55 +158,101 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
switch (state) {
case 0:
if (eof) ADVANCE(4);
if (lookahead == '#') ADVANCE(3);
if (lookahead == '+') ADVANCE(9);
if (lookahead == '=') ADVANCE(8);
if (lookahead == '#') ADVANCE(2);
if (lookahead == '&') ADVANCE(15);
if (lookahead == '+') ADVANCE(11);
if (lookahead == '-') ADVANCE(10);
if (lookahead == '/') ADVANCE(13);
if (lookahead == ';') ADVANCE(12);
if (lookahead == '=') ADVANCE(9);
if (lookahead == '|') ADVANCE(14);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(0)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(10);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7);
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(16);
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(6);
END_STATE();
case 1:
if (lookahead == '\n') ADVANCE(5);
if (lookahead == '\r') ADVANCE(6);
if (lookahead == '\n') ADVANCE(7);
if (lookahead == '\r') ADVANCE(8);
if (lookahead != 0) ADVANCE(1);
END_STATE();
case 2:
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7);
END_STATE();
case 3:
if (lookahead != 0 &&
lookahead != '\n') ADVANCE(1);
END_STATE();
case 3:
if (eof) ADVANCE(4);
if (lookahead == '#') ADVANCE(2);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(3)
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE();
case 4:
ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE();
case 5:
ACCEPT_TOKEN(sym_comment);
ACCEPT_TOKEN(sym_identifier);
if (lookahead == '_') ADVANCE(5);
if (lookahead == '.' ||
lookahead == '|') ADVANCE(6);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(5);
END_STATE();
case 6:
ACCEPT_TOKEN(sym_comment);
if (lookahead == '\n') ADVANCE(5);
if (lookahead == '\r') ADVANCE(6);
if (lookahead != 0) ADVANCE(1);
ACCEPT_TOKEN(sym_identifier);
if (lookahead == '_') ADVANCE(5);
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE();
case 7:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == '_') ADVANCE(2);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(7);
ACCEPT_TOKEN(sym_comment);
END_STATE();
case 8:
ACCEPT_TOKEN(anon_sym_EQ);
ACCEPT_TOKEN(sym_comment);
if (lookahead == '\n') ADVANCE(7);
if (lookahead == '\r') ADVANCE(8);
if (lookahead != 0) ADVANCE(1);
END_STATE();
case 9:
ACCEPT_TOKEN(anon_sym_PLUS);
ACCEPT_TOKEN(anon_sym_EQ);
END_STATE();
case 10:
ACCEPT_TOKEN(anon_sym_DASH);
END_STATE();
case 11:
ACCEPT_TOKEN(anon_sym_PLUS);
END_STATE();
case 12:
ACCEPT_TOKEN(anon_sym_SEMI);
END_STATE();
case 13:
ACCEPT_TOKEN(anon_sym_SLASH);
END_STATE();
case 14:
ACCEPT_TOKEN(anon_sym_PIPE);
if (lookahead == '_') ADVANCE(5);
if (lookahead == '.' ||
('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
lookahead == '|') ADVANCE(6);
END_STATE();
case 15:
ACCEPT_TOKEN(anon_sym_AMP);
END_STATE();
case 16:
ACCEPT_TOKEN(sym_integer);
END_STATE();
default:
@ -171,60 +262,97 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[0] = {.lex_state = 0},
[1] = {.lex_state = 0},
[1] = {.lex_state = 3},
[2] = {.lex_state = 0},
[3] = {.lex_state = 0},
[4] = {.lex_state = 0},
[5] = {.lex_state = 0},
[3] = {.lex_state = 3},
[4] = {.lex_state = 3},
[5] = {.lex_state = 3},
[6] = {.lex_state = 0},
[7] = {.lex_state = 3},
[8] = {.lex_state = 3},
};
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[0] = {
[ts_builtin_sym_end] = ACTIONS(1),
[sym_comment] = ACTIONS(1),
[sym_identifier] = ACTIONS(1),
[sym_comment] = ACTIONS(1),
[anon_sym_EQ] = ACTIONS(1),
[anon_sym_DASH] = ACTIONS(1),
[anon_sym_PLUS] = ACTIONS(1),
[anon_sym_SEMI] = ACTIONS(1),
[anon_sym_SLASH] = ACTIONS(1),
[anon_sym_PIPE] = ACTIONS(1),
[anon_sym_AMP] = ACTIONS(1),
[sym_integer] = ACTIONS(1),
},
[1] = {
[sym_source_file] = STATE(5),
[sym_expression] = STATE(2),
[aux_sym_source_file_repeat1] = STATE(2),
[sym_source_file] = STATE(6),
[sym_expression] = STATE(3),
[aux_sym_source_file_repeat1] = STATE(3),
[ts_builtin_sym_end] = ACTIONS(3),
[sym_comment] = ACTIONS(5),
[sym_identifier] = ACTIONS(7),
[sym_identifier] = ACTIONS(5),
[sym_comment] = ACTIONS(7),
},
[2] = {
[sym_expression] = STATE(3),
[aux_sym_source_file_repeat1] = STATE(3),
[sym_operator] = STATE(8),
[ts_builtin_sym_end] = ACTIONS(9),
[sym_comment] = ACTIONS(11),
[sym_identifier] = ACTIONS(7),
},
[3] = {
[sym_expression] = STATE(3),
[aux_sym_source_file_repeat1] = STATE(3),
[ts_builtin_sym_end] = ACTIONS(13),
[sym_comment] = ACTIONS(15),
[sym_identifier] = ACTIONS(18),
[sym_identifier] = ACTIONS(11),
[sym_comment] = ACTIONS(9),
[anon_sym_EQ] = ACTIONS(13),
[anon_sym_DASH] = ACTIONS(13),
[anon_sym_PLUS] = ACTIONS(13),
[anon_sym_SEMI] = ACTIONS(13),
[anon_sym_SLASH] = ACTIONS(13),
[anon_sym_PIPE] = ACTIONS(15),
[anon_sym_AMP] = ACTIONS(13),
},
};
static const uint16_t ts_small_parse_table[] = {
[0] = 1,
ACTIONS(21), 3,
ts_builtin_sym_end,
sym_comment,
[0] = 4,
ACTIONS(5), 1,
sym_identifier,
[6] = 1,
ACTIONS(23), 1,
ACTIONS(17), 1,
ts_builtin_sym_end,
ACTIONS(19), 1,
sym_comment,
STATE(4), 2,
sym_expression,
aux_sym_source_file_repeat1,
[14] = 4,
ACTIONS(21), 1,
ts_builtin_sym_end,
ACTIONS(23), 1,
sym_identifier,
ACTIONS(26), 1,
sym_comment,
STATE(4), 2,
sym_expression,
aux_sym_source_file_repeat1,
[28] = 1,
ACTIONS(29), 3,
ts_builtin_sym_end,
sym_identifier,
sym_comment,
[34] = 1,
ACTIONS(31), 1,
ts_builtin_sym_end,
[38] = 1,
ACTIONS(33), 1,
sym_identifier,
[42] = 1,
ACTIONS(35), 1,
sym_identifier,
};
static const uint32_t ts_small_parse_table_map[] = {
[SMALL_STATE(4)] = 0,
[SMALL_STATE(5)] = 6,
[SMALL_STATE(3)] = 0,
[SMALL_STATE(4)] = 14,
[SMALL_STATE(5)] = 28,
[SMALL_STATE(6)] = 34,
[SMALL_STATE(7)] = 38,
[SMALL_STATE(8)] = 42,
};
static const TSParseActionEntry ts_parse_actions[] = {
@ -232,14 +360,20 @@ static const TSParseActionEntry ts_parse_actions[] = {
[1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
[3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0),
[5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2),
[15] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(3),
[18] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4),
[21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1),
[23] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1),
[11] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_expression, 1),
[13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7),
[15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7),
[17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2),
[23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(2),
[26] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4),
[29] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 3),
[31] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[33] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_operator, 1),
[35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5),
};
#ifdef __cplusplus