diff --git a/corpus/tests.txt b/corpus/tests.txt index 0c39d2c..95f15e7 100644 --- a/corpus/tests.txt +++ b/corpus/tests.txt @@ -14,3 +14,63 @@ x # xyz (expression (identifier)) (comment)) + +================== +Identifiers +================== + +variable_name +_unused_variable +__strange_format__ +a +blahblah +x.x + +--- + +(source_file + (expression + (identifier)) + (expression + (identifier)) + (expression + (identifier)) + (expression + (identifier)) + (expression + (identifier)) + (expression + (identifier))) + +================== +Operators +================== + ++ - = / & | + +--- + +(source_file + (expression + (operator) + (operator) + (operator) + (operator) + (operator) + (operator))) + +================== +Expressions +================== + +x_x = 1; + +--- + +(source_file + (expression + (identifier)) + (operator) + (integer))) + + diff --git a/grammar.js b/grammar.js index 166a92e..f192083 100644 --- a/grammar.js +++ b/grammar.js @@ -4,18 +4,24 @@ module.exports = grammar({ rules: { source_file: $ => repeat(choice($.comment, $.expression)), + identifier: $ => /[a-zA-Z|_|.]+(_[a-zA-Z]+)*/, + comment: $ => /(#)(.+?)([\n\r])/, expression: $ => choice( - $.identifier - // TODO: other kinds of definitions + $.identifier, + seq($.identifier, $.operator, $.identifier) + ), - identifier: $ => /[a-zA-Z]+(_[a-zA-Z]+)*/, - operator: $ => choice( '=', - "+", + '-', + '+', + ';', + '/', + '|', + '&' ), integer: $ => /\d/, diff --git a/src/grammar.json b/src/grammar.json index 7a40701..ab727d7 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -17,6 +17,10 @@ ] } }, + "identifier": { + "type": "PATTERN", + "value": "[a-zA-Z|_|.]+(_[a-zA-Z]+)*" + }, "comment": { "type": "PATTERN", "value": "(#)(.+?)([\\n\\r])" @@ -27,13 +31,26 @@ { "type": "SYMBOL", "name": "identifier" + }, + { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "identifier" + }, + { + "type": "SYMBOL", + "name": "operator" + }, + { + "type": "SYMBOL", + "name": "identifier" + } + ] } ] }, - "identifier": { - "type": "PATTERN", - "value": "[a-zA-Z]+(_[a-zA-Z]+)*" - }, "operator": { "type": "CHOICE", "members": [ @@ -41,9 +58,29 @@ "type": "STRING", "value": "=" }, + { + "type": "STRING", + "value": "-" + }, { "type": "STRING", "value": "+" + }, + { + "type": "STRING", + "value": ";" + }, + { + "type": "STRING", + "value": "/" + }, + { + "type": "STRING", + "value": "|" + }, + { + "type": "STRING", + "value": "&" } ] }, diff --git a/src/node-types.json b/src/node-types.json index 2201003..720ec73 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -4,16 +4,25 @@ "named": true, "fields": {}, "children": { - "multiple": false, + "multiple": true, "required": true, "types": [ { "type": "identifier", "named": true + }, + { + "type": "operator", + "named": true } ] } }, + { + "type": "operator", + "named": true, + "fields": {} + }, { "type": "source_file", "named": true, @@ -33,10 +42,26 @@ ] } }, + { + "type": "&", + "named": false + }, { "type": "+", "named": false }, + { + "type": "-", + "named": false + }, + { + "type": "/", + "named": false + }, + { + "type": ";", + "named": false + }, { "type": "=", "named": false @@ -48,5 +73,9 @@ { "type": "identifier", "named": true + }, + { + "type": "|", + "named": false } ] \ No newline at end of file diff --git a/src/parser.c b/src/parser.c index 678081b..c6891a6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,48 +6,66 @@ #endif #define LANGUAGE_VERSION 14 -#define STATE_COUNT 6 -#define LARGE_STATE_COUNT 4 -#define SYMBOL_COUNT 9 +#define STATE_COUNT 9 +#define LARGE_STATE_COUNT 3 +#define SYMBOL_COUNT 15 #define ALIAS_COUNT 0 -#define TOKEN_COUNT 6 +#define TOKEN_COUNT 11 #define EXTERNAL_TOKEN_COUNT 0 #define FIELD_COUNT 0 -#define MAX_ALIAS_SEQUENCE_LENGTH 2 +#define MAX_ALIAS_SEQUENCE_LENGTH 3 #define PRODUCTION_ID_COUNT 1 enum { - sym_comment = 1, - sym_identifier = 2, + sym_identifier = 1, + sym_comment = 2, anon_sym_EQ = 3, - anon_sym_PLUS = 4, - sym_integer = 5, - sym_source_file = 6, - sym_expression = 7, - aux_sym_source_file_repeat1 = 8, + anon_sym_DASH = 4, + anon_sym_PLUS = 5, + anon_sym_SEMI = 6, + anon_sym_SLASH = 7, + anon_sym_PIPE = 8, + anon_sym_AMP = 9, + sym_integer = 10, + sym_source_file = 11, + sym_expression = 12, + sym_operator = 13, + aux_sym_source_file_repeat1 = 14, }; static const char * const ts_symbol_names[] = { [ts_builtin_sym_end] = "end", - [sym_comment] = "comment", [sym_identifier] = "identifier", + [sym_comment] = "comment", [anon_sym_EQ] = "=", + [anon_sym_DASH] = "-", [anon_sym_PLUS] = "+", + [anon_sym_SEMI] = ";", + [anon_sym_SLASH] = "/", + [anon_sym_PIPE] = "|", + [anon_sym_AMP] = "&", [sym_integer] = "integer", [sym_source_file] = "source_file", [sym_expression] = "expression", + [sym_operator] = "operator", [aux_sym_source_file_repeat1] = "source_file_repeat1", }; static const TSSymbol ts_symbol_map[] = { [ts_builtin_sym_end] = ts_builtin_sym_end, - [sym_comment] = sym_comment, [sym_identifier] = sym_identifier, + [sym_comment] = sym_comment, [anon_sym_EQ] = anon_sym_EQ, + [anon_sym_DASH] = anon_sym_DASH, [anon_sym_PLUS] = anon_sym_PLUS, + [anon_sym_SEMI] = anon_sym_SEMI, + [anon_sym_SLASH] = anon_sym_SLASH, + [anon_sym_PIPE] = anon_sym_PIPE, + [anon_sym_AMP] = anon_sym_AMP, [sym_integer] = sym_integer, [sym_source_file] = sym_source_file, [sym_expression] = sym_expression, + [sym_operator] = sym_operator, [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, }; @@ -56,11 +74,11 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = true, }, - [sym_comment] = { + [sym_identifier] = { .visible = true, .named = true, }, - [sym_identifier] = { + [sym_comment] = { .visible = true, .named = true, }, @@ -68,10 +86,30 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = false, }, + [anon_sym_DASH] = { + .visible = true, + .named = false, + }, [anon_sym_PLUS] = { .visible = true, .named = false, }, + [anon_sym_SEMI] = { + .visible = true, + .named = false, + }, + [anon_sym_SLASH] = { + .visible = true, + .named = false, + }, + [anon_sym_PIPE] = { + .visible = true, + .named = false, + }, + [anon_sym_AMP] = { + .visible = true, + .named = false, + }, [sym_integer] = { .visible = true, .named = true, @@ -84,6 +122,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = true, }, + [sym_operator] = { + .visible = true, + .named = true, + }, [aux_sym_source_file_repeat1] = { .visible = false, .named = false, @@ -105,6 +147,9 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = { [3] = 3, [4] = 4, [5] = 5, + [6] = 6, + [7] = 7, + [8] = 8, }; static bool ts_lex(TSLexer *lexer, TSStateId state) { @@ -113,55 +158,101 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { switch (state) { case 0: if (eof) ADVANCE(4); - if (lookahead == '#') ADVANCE(3); - if (lookahead == '+') ADVANCE(9); - if (lookahead == '=') ADVANCE(8); + if (lookahead == '#') ADVANCE(2); + if (lookahead == '&') ADVANCE(15); + if (lookahead == '+') ADVANCE(11); + if (lookahead == '-') ADVANCE(10); + if (lookahead == '/') ADVANCE(13); + if (lookahead == ';') ADVANCE(12); + if (lookahead == '=') ADVANCE(9); + if (lookahead == '|') ADVANCE(14); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0) - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(10); - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(7); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(16); + if (lookahead == '.' || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(6); END_STATE(); case 1: - if (lookahead == '\n') ADVANCE(5); - if (lookahead == '\r') ADVANCE(6); + if (lookahead == '\n') ADVANCE(7); + if (lookahead == '\r') ADVANCE(8); if (lookahead != 0) ADVANCE(1); END_STATE(); case 2: - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(7); - END_STATE(); - case 3: if (lookahead != 0 && lookahead != '\n') ADVANCE(1); END_STATE(); + case 3: + if (eof) ADVANCE(4); + if (lookahead == '#') ADVANCE(2); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(3) + if (lookahead == '.' || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z') || + lookahead == '|') ADVANCE(6); + END_STATE(); case 4: ACCEPT_TOKEN(ts_builtin_sym_end); END_STATE(); case 5: - ACCEPT_TOKEN(sym_comment); + ACCEPT_TOKEN(sym_identifier); + if (lookahead == '_') ADVANCE(5); + if (lookahead == '.' || + lookahead == '|') ADVANCE(6); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(5); END_STATE(); case 6: - ACCEPT_TOKEN(sym_comment); - if (lookahead == '\n') ADVANCE(5); - if (lookahead == '\r') ADVANCE(6); - if (lookahead != 0) ADVANCE(1); + ACCEPT_TOKEN(sym_identifier); + if (lookahead == '_') ADVANCE(5); + if (lookahead == '.' || + ('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z') || + lookahead == '|') ADVANCE(6); END_STATE(); case 7: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == '_') ADVANCE(2); - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(7); + ACCEPT_TOKEN(sym_comment); END_STATE(); case 8: - ACCEPT_TOKEN(anon_sym_EQ); + ACCEPT_TOKEN(sym_comment); + if (lookahead == '\n') ADVANCE(7); + if (lookahead == '\r') ADVANCE(8); + if (lookahead != 0) ADVANCE(1); END_STATE(); case 9: - ACCEPT_TOKEN(anon_sym_PLUS); + ACCEPT_TOKEN(anon_sym_EQ); END_STATE(); case 10: + ACCEPT_TOKEN(anon_sym_DASH); + END_STATE(); + case 11: + ACCEPT_TOKEN(anon_sym_PLUS); + END_STATE(); + case 12: + ACCEPT_TOKEN(anon_sym_SEMI); + END_STATE(); + case 13: + ACCEPT_TOKEN(anon_sym_SLASH); + END_STATE(); + case 14: + ACCEPT_TOKEN(anon_sym_PIPE); + if (lookahead == '_') ADVANCE(5); + if (lookahead == '.' || + ('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z') || + lookahead == '|') ADVANCE(6); + END_STATE(); + case 15: + ACCEPT_TOKEN(anon_sym_AMP); + END_STATE(); + case 16: ACCEPT_TOKEN(sym_integer); END_STATE(); default: @@ -171,60 +262,97 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static const TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 0}, + [1] = {.lex_state = 3}, [2] = {.lex_state = 0}, - [3] = {.lex_state = 0}, - [4] = {.lex_state = 0}, - [5] = {.lex_state = 0}, + [3] = {.lex_state = 3}, + [4] = {.lex_state = 3}, + [5] = {.lex_state = 3}, + [6] = {.lex_state = 0}, + [7] = {.lex_state = 3}, + [8] = {.lex_state = 3}, }; static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [0] = { [ts_builtin_sym_end] = ACTIONS(1), - [sym_comment] = ACTIONS(1), [sym_identifier] = ACTIONS(1), + [sym_comment] = ACTIONS(1), [anon_sym_EQ] = ACTIONS(1), + [anon_sym_DASH] = ACTIONS(1), [anon_sym_PLUS] = ACTIONS(1), + [anon_sym_SEMI] = ACTIONS(1), + [anon_sym_SLASH] = ACTIONS(1), + [anon_sym_PIPE] = ACTIONS(1), + [anon_sym_AMP] = ACTIONS(1), [sym_integer] = ACTIONS(1), }, [1] = { - [sym_source_file] = STATE(5), - [sym_expression] = STATE(2), - [aux_sym_source_file_repeat1] = STATE(2), + [sym_source_file] = STATE(6), + [sym_expression] = STATE(3), + [aux_sym_source_file_repeat1] = STATE(3), [ts_builtin_sym_end] = ACTIONS(3), - [sym_comment] = ACTIONS(5), - [sym_identifier] = ACTIONS(7), + [sym_identifier] = ACTIONS(5), + [sym_comment] = ACTIONS(7), }, [2] = { - [sym_expression] = STATE(3), - [aux_sym_source_file_repeat1] = STATE(3), + [sym_operator] = STATE(8), [ts_builtin_sym_end] = ACTIONS(9), - [sym_comment] = ACTIONS(11), - [sym_identifier] = ACTIONS(7), - }, - [3] = { - [sym_expression] = STATE(3), - [aux_sym_source_file_repeat1] = STATE(3), - [ts_builtin_sym_end] = ACTIONS(13), - [sym_comment] = ACTIONS(15), - [sym_identifier] = ACTIONS(18), + [sym_identifier] = ACTIONS(11), + [sym_comment] = ACTIONS(9), + [anon_sym_EQ] = ACTIONS(13), + [anon_sym_DASH] = ACTIONS(13), + [anon_sym_PLUS] = ACTIONS(13), + [anon_sym_SEMI] = ACTIONS(13), + [anon_sym_SLASH] = ACTIONS(13), + [anon_sym_PIPE] = ACTIONS(15), + [anon_sym_AMP] = ACTIONS(13), }, }; static const uint16_t ts_small_parse_table[] = { - [0] = 1, - ACTIONS(21), 3, - ts_builtin_sym_end, - sym_comment, + [0] = 4, + ACTIONS(5), 1, sym_identifier, - [6] = 1, - ACTIONS(23), 1, + ACTIONS(17), 1, ts_builtin_sym_end, + ACTIONS(19), 1, + sym_comment, + STATE(4), 2, + sym_expression, + aux_sym_source_file_repeat1, + [14] = 4, + ACTIONS(21), 1, + ts_builtin_sym_end, + ACTIONS(23), 1, + sym_identifier, + ACTIONS(26), 1, + sym_comment, + STATE(4), 2, + sym_expression, + aux_sym_source_file_repeat1, + [28] = 1, + ACTIONS(29), 3, + ts_builtin_sym_end, + sym_identifier, + sym_comment, + [34] = 1, + ACTIONS(31), 1, + ts_builtin_sym_end, + [38] = 1, + ACTIONS(33), 1, + sym_identifier, + [42] = 1, + ACTIONS(35), 1, + sym_identifier, }; static const uint32_t ts_small_parse_table_map[] = { - [SMALL_STATE(4)] = 0, - [SMALL_STATE(5)] = 6, + [SMALL_STATE(3)] = 0, + [SMALL_STATE(4)] = 14, + [SMALL_STATE(5)] = 28, + [SMALL_STATE(6)] = 34, + [SMALL_STATE(7)] = 38, + [SMALL_STATE(8)] = 42, }; static const TSParseActionEntry ts_parse_actions[] = { @@ -232,14 +360,20 @@ static const TSParseActionEntry ts_parse_actions[] = { [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), - [9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), - [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), - [15] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(3), - [18] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4), - [21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1), - [23] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 1), + [11] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_expression, 1), + [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7), + [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), + [23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(2), + [26] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4), + [29] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_expression, 3), + [31] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [33] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_operator, 1), + [35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5), }; #ifdef __cplusplus