Author: Michael Davis [mcarsondavis@gmail.com]
Hash: 4e27b28df2dcaadc00de1272ed80b787209adbc2
Timestamp: Thu, 23 Dec 2021 23:05:59 +0000 (2 years ago)

+615 -0 +/-11 browse
generate 'hello world' of grammars
1diff --git a/Cargo.toml b/Cargo.toml
2new file mode 100644
3index 0000000..4e07c5c
4--- /dev/null
5+++ b/Cargo.toml
6 @@ -0,0 +1,26 @@
7+ [package]
8+ name = "tree-sitter-gitdiff"
9+ description = "gitdiff grammar for the tree-sitter parsing library"
10+ version = "0.0.1"
11+ keywords = ["incremental", "parsing", "gitdiff"]
12+ categories = ["parsing", "text-editors"]
13+ repository = "https://github.com/tree-sitter/tree-sitter-gitdiff"
14+ edition = "2018"
15+ license = "MIT"
16+
17+ build = "bindings/rust/build.rs"
18+ include = [
19+ "bindings/rust/*",
20+ "grammar.js",
21+ "queries/*",
22+ "src/*",
23+ ]
24+
25+ [lib]
26+ path = "bindings/rust/lib.rs"
27+
28+ [dependencies]
29+ tree-sitter = "~0.20"
30+
31+ [build-dependencies]
32+ cc = "1.0"
33 diff --git a/binding.gyp b/binding.gyp
34new file mode 100644
35index 0000000..4bd78e7
36--- /dev/null
37+++ b/binding.gyp
38 @@ -0,0 +1,19 @@
39+ {
40+ "targets": [
41+ {
42+ "target_name": "tree_sitter_gitdiff_binding",
43+ "include_dirs": [
44+ "<!(node -e \"require('nan')\")",
45+ "src"
46+ ],
47+ "sources": [
48+ "bindings/node/binding.cc",
49+ "src/parser.c",
50+ # If your language uses an external scanner, add it here.
51+ ],
52+ "cflags_c": [
53+ "-std=c99",
54+ ]
55+ }
56+ ]
57+ }
58 diff --git a/bindings/node/binding.cc b/bindings/node/binding.cc
59new file mode 100644
60index 0000000..63701f1
61--- /dev/null
62+++ b/bindings/node/binding.cc
63 @@ -0,0 +1,28 @@
64+ #include "tree_sitter/parser.h"
65+ #include <node.h>
66+ #include "nan.h"
67+
68+ using namespace v8;
69+
70+ extern "C" TSLanguage * tree_sitter_gitdiff();
71+
72+ namespace {
73+
74+ NAN_METHOD(New) {}
75+
76+ void Init(Local<Object> exports, Local<Object> module) {
77+ Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
78+ tpl->SetClassName(Nan::New("Language").ToLocalChecked());
79+ tpl->InstanceTemplate()->SetInternalFieldCount(1);
80+
81+ Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
82+ Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
83+ Nan::SetInternalFieldPointer(instance, 0, tree_sitter_gitdiff());
84+
85+ Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("gitdiff").ToLocalChecked());
86+ Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
87+ }
88+
89+ NODE_MODULE(tree_sitter_gitdiff_binding, Init)
90+
91+ } // namespace
92 diff --git a/bindings/node/index.js b/bindings/node/index.js
93new file mode 100644
94index 0000000..7ea3730
95--- /dev/null
96+++ b/bindings/node/index.js
97 @@ -0,0 +1,19 @@
98+ try {
99+ module.exports = require("../../build/Release/tree_sitter_gitdiff_binding");
100+ } catch (error1) {
101+ if (error1.code !== 'MODULE_NOT_FOUND') {
102+ throw error1;
103+ }
104+ try {
105+ module.exports = require("../../build/Debug/tree_sitter_gitdiff_binding");
106+ } catch (error2) {
107+ if (error2.code !== 'MODULE_NOT_FOUND') {
108+ throw error2;
109+ }
110+ throw error1
111+ }
112+ }
113+
114+ try {
115+ module.exports.nodeTypeInfo = require("../../src/node-types.json");
116+ } catch (_) {}
117 diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs
118new file mode 100644
119index 0000000..c6061f0
120--- /dev/null
121+++ b/bindings/rust/build.rs
122 @@ -0,0 +1,40 @@
123+ fn main() {
124+ let src_dir = std::path::Path::new("src");
125+
126+ let mut c_config = cc::Build::new();
127+ c_config.include(&src_dir);
128+ c_config
129+ .flag_if_supported("-Wno-unused-parameter")
130+ .flag_if_supported("-Wno-unused-but-set-variable")
131+ .flag_if_supported("-Wno-trigraphs");
132+ let parser_path = src_dir.join("parser.c");
133+ c_config.file(&parser_path);
134+
135+ // If your language uses an external scanner written in C,
136+ // then include this block of code:
137+
138+ /*
139+ let scanner_path = src_dir.join("scanner.c");
140+ c_config.file(&scanner_path);
141+ println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
142+ */
143+
144+ c_config.compile("parser");
145+ println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
146+
147+ // If your language uses an external scanner written in C++,
148+ // then include this block of code:
149+
150+ /*
151+ let mut cpp_config = cc::Build::new();
152+ cpp_config.cpp(true);
153+ cpp_config.include(&src_dir);
154+ cpp_config
155+ .flag_if_supported("-Wno-unused-parameter")
156+ .flag_if_supported("-Wno-unused-but-set-variable");
157+ let scanner_path = src_dir.join("scanner.cc");
158+ cpp_config.file(&scanner_path);
159+ cpp_config.compile("scanner");
160+ println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
161+ */
162+ }
163 diff --git a/bindings/rust/lib.rs b/bindings/rust/lib.rs
164new file mode 100644
165index 0000000..202fdb6
166--- /dev/null
167+++ b/bindings/rust/lib.rs
168 @@ -0,0 +1,52 @@
169+ //! This crate provides gitdiff language support for the [tree-sitter][] parsing library.
170+ //!
171+ //! Typically, you will use the [language][language func] function to add this language to a
172+ //! tree-sitter [Parser][], and then use the parser to parse some code:
173+ //!
174+ //! ```
175+ //! let code = "";
176+ //! let mut parser = tree_sitter::Parser::new();
177+ //! parser.set_language(tree_sitter_gitdiff::language()).expect("Error loading gitdiff grammar");
178+ //! let tree = parser.parse(code, None).unwrap();
179+ //! ```
180+ //!
181+ //! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
182+ //! [language func]: fn.language.html
183+ //! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
184+ //! [tree-sitter]: https://tree-sitter.github.io/
185+
186+ use tree_sitter::Language;
187+
188+ extern "C" {
189+ fn tree_sitter_gitdiff() -> Language;
190+ }
191+
192+ /// Get the tree-sitter [Language][] for this grammar.
193+ ///
194+ /// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
195+ pub fn language() -> Language {
196+ unsafe { tree_sitter_gitdiff() }
197+ }
198+
199+ /// The content of the [`node-types.json`][] file for this grammar.
200+ ///
201+ /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
202+ pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
203+
204+ // Uncomment these to include any queries that this grammar contains
205+
206+ // pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
207+ // pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
208+ // pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
209+ // pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
210+
211+ #[cfg(test)]
212+ mod tests {
213+ #[test]
214+ fn test_can_load_grammar() {
215+ let mut parser = tree_sitter::Parser::new();
216+ parser
217+ .set_language(super::language())
218+ .expect("Error loading gitdiff language");
219+ }
220+ }
221 diff --git a/grammar.js b/grammar.js
222new file mode 100644
223index 0000000..fa6963f
224--- /dev/null
225+++ b/grammar.js
226 @@ -0,0 +1,7 @@
227+ module.exports = grammar({
228+ name: "gitdiff",
229+
230+ rules: {
231+ source: ($) => "hello",
232+ },
233+ });
234 diff --git a/src/grammar.json b/src/grammar.json
235new file mode 100644
236index 0000000..4662e9d
237--- /dev/null
238+++ b/src/grammar.json
239 @@ -0,0 +1,21 @@
240+ {
241+ "name": "gitdiff",
242+ "rules": {
243+ "source": {
244+ "type": "STRING",
245+ "value": "hello"
246+ }
247+ },
248+ "extras": [
249+ {
250+ "type": "PATTERN",
251+ "value": "\\s"
252+ }
253+ ],
254+ "conflicts": [],
255+ "precedences": [],
256+ "externals": [],
257+ "inline": [],
258+ "supertypes": []
259+ }
260+
261 diff --git a/src/node-types.json b/src/node-types.json
262new file mode 100644
263index 0000000..c3b80ca
264--- /dev/null
265+++ b/src/node-types.json
266 @@ -0,0 +1,11 @@
267+ [
268+ {
269+ "type": "source",
270+ "named": true,
271+ "fields": {}
272+ },
273+ {
274+ "type": "hello",
275+ "named": false
276+ }
277+ ]
278\ No newline at end of file
279 diff --git a/src/parser.c b/src/parser.c
280new file mode 100644
281index 0000000..642ddce
282--- /dev/null
283+++ b/src/parser.c
284 @@ -0,0 +1,169 @@
285+ #include <tree_sitter/parser.h>
286+
287+ #if defined(__GNUC__) || defined(__clang__)
288+ #pragma GCC diagnostic push
289+ #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
290+ #endif
291+
292+ #define LANGUAGE_VERSION 13
293+ #define STATE_COUNT 4
294+ #define LARGE_STATE_COUNT 2
295+ #define SYMBOL_COUNT 3
296+ #define ALIAS_COUNT 0
297+ #define TOKEN_COUNT 2
298+ #define EXTERNAL_TOKEN_COUNT 0
299+ #define FIELD_COUNT 0
300+ #define MAX_ALIAS_SEQUENCE_LENGTH 1
301+ #define PRODUCTION_ID_COUNT 1
302+
303+ enum {
304+ anon_sym_hello = 1,
305+ sym_source = 2,
306+ };
307+
308+ static const char * const ts_symbol_names[] = {
309+ [ts_builtin_sym_end] = "end",
310+ [anon_sym_hello] = "hello",
311+ [sym_source] = "source",
312+ };
313+
314+ static const TSSymbol ts_symbol_map[] = {
315+ [ts_builtin_sym_end] = ts_builtin_sym_end,
316+ [anon_sym_hello] = anon_sym_hello,
317+ [sym_source] = sym_source,
318+ };
319+
320+ static const TSSymbolMetadata ts_symbol_metadata[] = {
321+ [ts_builtin_sym_end] = {
322+ .visible = false,
323+ .named = true,
324+ },
325+ [anon_sym_hello] = {
326+ .visible = true,
327+ .named = false,
328+ },
329+ [sym_source] = {
330+ .visible = true,
331+ .named = true,
332+ },
333+ };
334+
335+ static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
336+ [0] = {0},
337+ };
338+
339+ static const uint16_t ts_non_terminal_alias_map[] = {
340+ 0,
341+ };
342+
343+ static bool ts_lex(TSLexer *lexer, TSStateId state) {
344+ START_LEXER();
345+ eof = lexer->eof(lexer);
346+ switch (state) {
347+ case 0:
348+ if (eof) ADVANCE(5);
349+ if (lookahead == 'h') ADVANCE(1);
350+ if (lookahead == '\t' ||
351+ lookahead == '\n' ||
352+ lookahead == '\r' ||
353+ lookahead == ' ') SKIP(0)
354+ END_STATE();
355+ case 1:
356+ if (lookahead == 'e') ADVANCE(3);
357+ END_STATE();
358+ case 2:
359+ if (lookahead == 'l') ADVANCE(4);
360+ END_STATE();
361+ case 3:
362+ if (lookahead == 'l') ADVANCE(2);
363+ END_STATE();
364+ case 4:
365+ if (lookahead == 'o') ADVANCE(6);
366+ END_STATE();
367+ case 5:
368+ ACCEPT_TOKEN(ts_builtin_sym_end);
369+ END_STATE();
370+ case 6:
371+ ACCEPT_TOKEN(anon_sym_hello);
372+ END_STATE();
373+ default:
374+ return false;
375+ }
376+ }
377+
378+ static const TSLexMode ts_lex_modes[STATE_COUNT] = {
379+ [0] = {.lex_state = 0},
380+ [1] = {.lex_state = 0},
381+ [2] = {.lex_state = 0},
382+ [3] = {.lex_state = 0},
383+ };
384+
385+ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
386+ [0] = {
387+ [ts_builtin_sym_end] = ACTIONS(1),
388+ [anon_sym_hello] = ACTIONS(1),
389+ },
390+ [1] = {
391+ [sym_source] = STATE(3),
392+ [anon_sym_hello] = ACTIONS(3),
393+ },
394+ };
395+
396+ static const uint16_t ts_small_parse_table[] = {
397+ [0] = 1,
398+ ACTIONS(5), 1,
399+ ts_builtin_sym_end,
400+ [4] = 1,
401+ ACTIONS(7), 1,
402+ ts_builtin_sym_end,
403+ };
404+
405+ static const uint32_t ts_small_parse_table_map[] = {
406+ [SMALL_STATE(2)] = 0,
407+ [SMALL_STATE(3)] = 4,
408+ };
409+
410+ static const TSParseActionEntry ts_parse_actions[] = {
411+ [0] = {.entry = {.count = 0, .reusable = false}},
412+ [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
413+ [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
414+ [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source, 1),
415+ [7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
416+ };
417+
418+ #ifdef __cplusplus
419+ extern "C" {
420+ #endif
421+ #ifdef _WIN32
422+ #define extern __declspec(dllexport)
423+ #endif
424+
425+ extern const TSLanguage *tree_sitter_gitdiff(void) {
426+ static const TSLanguage language = {
427+ .version = LANGUAGE_VERSION,
428+ .symbol_count = SYMBOL_COUNT,
429+ .alias_count = ALIAS_COUNT,
430+ .token_count = TOKEN_COUNT,
431+ .external_token_count = EXTERNAL_TOKEN_COUNT,
432+ .state_count = STATE_COUNT,
433+ .large_state_count = LARGE_STATE_COUNT,
434+ .production_id_count = PRODUCTION_ID_COUNT,
435+ .field_count = FIELD_COUNT,
436+ .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
437+ .parse_table = &ts_parse_table[0][0],
438+ .small_parse_table = ts_small_parse_table,
439+ .small_parse_table_map = ts_small_parse_table_map,
440+ .parse_actions = ts_parse_actions,
441+ .symbol_names = ts_symbol_names,
442+ .symbol_metadata = ts_symbol_metadata,
443+ .public_symbol_map = ts_symbol_map,
444+ .alias_map = ts_non_terminal_alias_map,
445+ .alias_sequences = &ts_alias_sequences[0][0],
446+ .lex_modes = ts_lex_modes,
447+ .lex_fn = ts_lex,
448+ };
449+ return &language;
450+ }
451+ #ifdef __cplusplus
452+ }
453+ #endif
454 diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
455new file mode 100644
456index 0000000..cbbc7b4
457--- /dev/null
458+++ b/src/tree_sitter/parser.h
459 @@ -0,0 +1,223 @@
460+ #ifndef TREE_SITTER_PARSER_H_
461+ #define TREE_SITTER_PARSER_H_
462+
463+ #ifdef __cplusplus
464+ extern "C" {
465+ #endif
466+
467+ #include <stdbool.h>
468+ #include <stdint.h>
469+ #include <stdlib.h>
470+
471+ #define ts_builtin_sym_error ((TSSymbol)-1)
472+ #define ts_builtin_sym_end 0
473+ #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
474+
475+ typedef uint16_t TSStateId;
476+
477+ #ifndef TREE_SITTER_API_H_
478+ typedef uint16_t TSSymbol;
479+ typedef uint16_t TSFieldId;
480+ typedef struct TSLanguage TSLanguage;
481+ #endif
482+
483+ typedef struct {
484+ TSFieldId field_id;
485+ uint8_t child_index;
486+ bool inherited;
487+ } TSFieldMapEntry;
488+
489+ typedef struct {
490+ uint16_t index;
491+ uint16_t length;
492+ } TSFieldMapSlice;
493+
494+ typedef struct {
495+ bool visible;
496+ bool named;
497+ bool supertype;
498+ } TSSymbolMetadata;
499+
500+ typedef struct TSLexer TSLexer;
501+
502+ struct TSLexer {
503+ int32_t lookahead;
504+ TSSymbol result_symbol;
505+ void (*advance)(TSLexer *, bool);
506+ void (*mark_end)(TSLexer *);
507+ uint32_t (*get_column)(TSLexer *);
508+ bool (*is_at_included_range_start)(const TSLexer *);
509+ bool (*eof)(const TSLexer *);
510+ };
511+
512+ typedef enum {
513+ TSParseActionTypeShift,
514+ TSParseActionTypeReduce,
515+ TSParseActionTypeAccept,
516+ TSParseActionTypeRecover,
517+ } TSParseActionType;
518+
519+ typedef union {
520+ struct {
521+ uint8_t type;
522+ TSStateId state;
523+ bool extra;
524+ bool repetition;
525+ } shift;
526+ struct {
527+ uint8_t type;
528+ uint8_t child_count;
529+ TSSymbol symbol;
530+ int16_t dynamic_precedence;
531+ uint16_t production_id;
532+ } reduce;
533+ uint8_t type;
534+ } TSParseAction;
535+
536+ typedef struct {
537+ uint16_t lex_state;
538+ uint16_t external_lex_state;
539+ } TSLexMode;
540+
541+ typedef union {
542+ TSParseAction action;
543+ struct {
544+ uint8_t count;
545+ bool reusable;
546+ } entry;
547+ } TSParseActionEntry;
548+
549+ struct TSLanguage {
550+ uint32_t version;
551+ uint32_t symbol_count;
552+ uint32_t alias_count;
553+ uint32_t token_count;
554+ uint32_t external_token_count;
555+ uint32_t state_count;
556+ uint32_t large_state_count;
557+ uint32_t production_id_count;
558+ uint32_t field_count;
559+ uint16_t max_alias_sequence_length;
560+ const uint16_t *parse_table;
561+ const uint16_t *small_parse_table;
562+ const uint32_t *small_parse_table_map;
563+ const TSParseActionEntry *parse_actions;
564+ const char * const *symbol_names;
565+ const char * const *field_names;
566+ const TSFieldMapSlice *field_map_slices;
567+ const TSFieldMapEntry *field_map_entries;
568+ const TSSymbolMetadata *symbol_metadata;
569+ const TSSymbol *public_symbol_map;
570+ const uint16_t *alias_map;
571+ const TSSymbol *alias_sequences;
572+ const TSLexMode *lex_modes;
573+ bool (*lex_fn)(TSLexer *, TSStateId);
574+ bool (*keyword_lex_fn)(TSLexer *, TSStateId);
575+ TSSymbol keyword_capture_token;
576+ struct {
577+ const bool *states;
578+ const TSSymbol *symbol_map;
579+ void *(*create)(void);
580+ void (*destroy)(void *);
581+ bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
582+ unsigned (*serialize)(void *, char *);
583+ void (*deserialize)(void *, const char *, unsigned);
584+ } external_scanner;
585+ };
586+
587+ /*
588+ * Lexer Macros
589+ */
590+
591+ #define START_LEXER() \
592+ bool result = false; \
593+ bool skip = false; \
594+ bool eof = false; \
595+ int32_t lookahead; \
596+ goto start; \
597+ next_state: \
598+ lexer->advance(lexer, skip); \
599+ start: \
600+ skip = false; \
601+ lookahead = lexer->lookahead;
602+
603+ #define ADVANCE(state_value) \
604+ { \
605+ state = state_value; \
606+ goto next_state; \
607+ }
608+
609+ #define SKIP(state_value) \
610+ { \
611+ skip = true; \
612+ state = state_value; \
613+ goto next_state; \
614+ }
615+
616+ #define ACCEPT_TOKEN(symbol_value) \
617+ result = true; \
618+ lexer->result_symbol = symbol_value; \
619+ lexer->mark_end(lexer);
620+
621+ #define END_STATE() return result;
622+
623+ /*
624+ * Parse Table Macros
625+ */
626+
627+ #define SMALL_STATE(id) id - LARGE_STATE_COUNT
628+
629+ #define STATE(id) id
630+
631+ #define ACTIONS(id) id
632+
633+ #define SHIFT(state_value) \
634+ {{ \
635+ .shift = { \
636+ .type = TSParseActionTypeShift, \
637+ .state = state_value \
638+ } \
639+ }}
640+
641+ #define SHIFT_REPEAT(state_value) \
642+ {{ \
643+ .shift = { \
644+ .type = TSParseActionTypeShift, \
645+ .state = state_value, \
646+ .repetition = true \
647+ } \
648+ }}
649+
650+ #define SHIFT_EXTRA() \
651+ {{ \
652+ .shift = { \
653+ .type = TSParseActionTypeShift, \
654+ .extra = true \
655+ } \
656+ }}
657+
658+ #define REDUCE(symbol_val, child_count_val, ...) \
659+ {{ \
660+ .reduce = { \
661+ .type = TSParseActionTypeReduce, \
662+ .symbol = symbol_val, \
663+ .child_count = child_count_val, \
664+ __VA_ARGS__ \
665+ }, \
666+ }}
667+
668+ #define RECOVER() \
669+ {{ \
670+ .type = TSParseActionTypeRecover \
671+ }}
672+
673+ #define ACCEPT_INPUT() \
674+ {{ \
675+ .type = TSParseActionTypeAccept \
676+ }}
677+
678+ #ifdef __cplusplus
679+ }
680+ #endif
681+
682+ #endif // TREE_SITTER_PARSER_H_