Commit
Author: Kevin Schoon [me@kevinschoon.com]
Hash: 1c0f5eeb4cbd30bff9b75d614d66223de400c341
Timestamp: Sun, 23 Jul 2023 13:17:45 +0000 (1 year ago)

+322 -104 +/-5 browse
rewrite with ninja
1diff --git a/.gitignore b/.gitignore
2index a173085..3189293 100644
3--- a/.gitignore
4+++ b/.gitignore
5 @@ -1 +1,3 @@
6 .ccls-cache
7+ target
8+ .ninja_log
9 diff --git a/README.md b/README.md
10index 1dc3c73..9e87b84 100644
11--- a/README.md
12+++ b/README.md
13 @@ -1,4 +1,41 @@
14 # tree-sitter-amalgamation
15
16- All tree-sitter parsers imported as submodules and compiled for easy use
17- in downstream packages.
18+ All of the tree-sitter parsers imported as submodules and compiled for easy use
19+ by downstream projects. Each parser is exposed as a shared module that can be
20+ dynamically loaded in another application. Generated source code is also shipped
21+ for use in build scripts.
22+
23+ ## Compiling
24+
25+ You need to install [ninja](https://ninja-build.org) to build the project
26+ as well as an ANSI C compiler.
27+
28+ ```sh
29+ git submodule update --init --recursive
30+ ninja
31+ ```
32+
33+ ## Adding New Languages
34+
35+ ```sh
36+ # add a new submodule to the repository
37+ cd grammars && git submodule add $your-tree-sitter-project
38+ # edit the build.py package adding your source module and adjust as needed
39+ vim build.py
40+ # regenerate the build.ninja script
41+ python build.py
42+ # rebuild the project
43+ ninja
44+ ```
45+
46+ ## Packagers
47+
48+ Suggested filesystem layout for package maintainers.
49+
50+ ```text
51+ /usr/lib/tree-sitter-amalgamation/$language.so
52+ /usr/include/tree-sitter-amalgamation/$language/queries/highlights.scm
53+ /usr/include/tree-sitter-amalgamation/$language/parser.c
54+ /usr/include/tree-sitter-amalgamation/$language/scanner.c
55+ /usr/include/tree-sitter-amalgamation/$language/common/scanner.h
56+ ```
57 diff --git a/build.ninja b/build.ninja
58new file mode 100644
59index 0000000..4ba944b
60--- /dev/null
61+++ b/build.ninja
62 @@ -0,0 +1,173 @@
63+ # automatically generated, do not edit.
64+ cc = cc -shared -fno-exceptions -g
65+ rule copy
66+ command = cp $in $out
67+ rule mkdir
68+ command = mkdir -p $out
69+ rule ts-compile
70+ command = cd $in && tree-sitter generate
71+ rule fix-scanner-include
72+ command = cat $in | sed "s/..\/..\/common\/scanner.h/common\/scanner.h/g" $
73+ > $out
74+ rule compile
75+ command = $cc $in -o $out
76+ build target: mkdir
77+ build target/bash: mkdir
78+ build target/bash/parser.c: copy grammars/tree-sitter-bash/src/parser.c
79+ build target/bash/scanner.c: copy grammars/tree-sitter-bash/src/scanner.c
80+ build target/bash/highlights.scm: copy $
81+ grammars/tree-sitter-bash/queries/highlights.scm
82+ build target/bash/bash.so: compile target/bash/parser.c
83+ build target/c: mkdir
84+ build target/c/parser.c: copy grammars/tree-sitter-c/src/parser.c
85+ build target/c/highlights.scm: copy $
86+ grammars/tree-sitter-c/queries/highlights.scm
87+ build target/c/c.so: compile target/c/parser.c
88+ build target/c-sharp: mkdir
89+ build target/c-sharp/parser.c: copy grammars/tree-sitter-c-sharp/src/parser.c
90+ build target/c-sharp/scanner.c: copy $
91+ grammars/tree-sitter-c-sharp/src/scanner.c
92+ build target/c-sharp/highlights.scm: copy $
93+ grammars/tree-sitter-c-sharp/queries/highlights.scm
94+ build target/c-sharp/c-sharp.so: compile target/c-sharp/parser.c
95+ build target/diff: mkdir
96+ build target/diff/parser.c: copy grammars/tree-sitter-diff/src/parser.c
97+ build target/diff/highlights.scm: copy $
98+ grammars/tree-sitter-diff/queries/highlights.scm
99+ build target/diff/diff.so: compile target/diff/parser.c
100+ build target/go: mkdir
101+ build target/go/parser.c: copy grammars/tree-sitter-go/src/parser.c
102+ build target/go/highlights.scm: copy $
103+ grammars/tree-sitter-go/queries/highlights.scm
104+ build target/go/go.so: compile target/go/parser.c
105+ build target/haskell: mkdir
106+ build target/haskell/parser.c: copy grammars/tree-sitter-haskell/src/parser.c
107+ build target/haskell/scanner.c: copy $
108+ grammars/tree-sitter-haskell/src/scanner.c
109+ build target/haskell/highlights.scm: copy $
110+ grammars/tree-sitter-haskell/queries/highlights.scm
111+ build target/haskell/haskell.so: compile target/haskell/parser.c
112+ build target/html: mkdir
113+ build target/html/parser.c: copy grammars/tree-sitter-html/src/parser.c
114+ build target/html/scanner.c: copy grammars/tree-sitter-html/src/scanner.c
115+ build target/html/highlights.scm: copy $
116+ grammars/tree-sitter-html/queries/highlights.scm
117+ build target/html/html.so: compile target/html/parser.c
118+ build target/java: mkdir
119+ build target/java/parser.c: copy grammars/tree-sitter-java/src/parser.c
120+ build target/java/highlights.scm: copy $
121+ grammars/tree-sitter-java/queries/highlights.scm
122+ build target/java/java.so: compile target/java/parser.c
123+ build target/javascript: mkdir
124+ build target/javascript/parser.c: copy $
125+ grammars/tree-sitter-javascript/src/parser.c
126+ build target/javascript/scanner.c: copy $
127+ grammars/tree-sitter-javascript/src/scanner.c
128+ build target/javascript/highlights.scm: copy $
129+ grammars/tree-sitter-javascript/queries/highlights.scm
130+ build target/javascript/javascript.so: compile target/javascript/parser.c
131+ build target/json: mkdir
132+ build target/json/parser.c: copy grammars/tree-sitter-json/src/parser.c
133+ build target/json/highlights.scm: copy $
134+ grammars/tree-sitter-json/queries/highlights.scm
135+ build target/json/json.so: compile target/json/parser.c
136+ build target/julia: mkdir
137+ build target/julia/parser.c: copy grammars/tree-sitter-julia/src/parser.c
138+ build target/julia/scanner.c: copy grammars/tree-sitter-julia/src/scanner.c
139+ build target/julia/julia.so: compile target/julia/parser.c
140+ build target/markdown: mkdir
141+ build target/markdown/parser.c: copy $
142+ grammars/tree-sitter-markdown/tree-sitter-markdown/src/parser.c
143+ build target/markdown/scanner.c: copy $
144+ grammars/tree-sitter-markdown/tree-sitter-markdown/src/scanner.c
145+ build target/markdown/markdown.so: compile target/markdown/parser.c
146+ build target/markdown-inline: mkdir
147+ build target/markdown-inline/parser.c: copy $
148+ grammars/tree-sitter-markdown/tree-sitter-markdown-inline/src/parser.c
149+ build target/markdown-inline/scanner.c: copy $
150+ grammars/tree-sitter-markdown/tree-sitter-markdown-inline/src/scanner.c
151+ build target/markdown-inline/highlights.scm: copy $
152+ grammars/tree-sitter-markdown/tree-sitter-markdown-inline/queries/highlights.scm
153+ build target/markdown-inline/markdown-inline.so: compile $
154+ target/markdown-inline/parser.c
155+ build target/ocaml: mkdir
156+ build target/ocaml/parser.c: copy $
157+ grammars/tree-sitter-ocaml/ocaml/src/parser.c
158+ build target/ocaml/scanner.c: fix-scanner-include $
159+ grammars/tree-sitter-ocaml/ocaml/src/scanner.c
160+ build target/ocaml/common: mkdir
161+ build target/ocaml/common/scanner.h: copy $
162+ grammars/tree-sitter-ocaml/ocaml/../common/scanner.h
163+ build target/ocaml/highlights.scm: copy $
164+ grammars/tree-sitter-ocaml/ocaml/../queries/highlights.scm
165+ build target/ocaml/ocaml.so: compile target/ocaml/parser.c
166+ build target/php: mkdir
167+ build target/php/parser.c: copy grammars/tree-sitter-php/src/parser.c
168+ build target/php/scanner.c: copy grammars/tree-sitter-php/src/scanner.c
169+ build target/php/highlights.scm: copy $
170+ grammars/tree-sitter-php/queries/highlights.scm
171+ build target/php/php.so: compile target/php/parser.c
172+ build target/python: mkdir
173+ build target/python/parser.c: copy grammars/tree-sitter-python/src/parser.c
174+ build target/python/scanner.c: copy grammars/tree-sitter-python/src/scanner.c
175+ build target/python/highlights.scm: copy $
176+ grammars/tree-sitter-python/queries/highlights.scm
177+ build target/python/python.so: compile target/python/parser.c
178+ build target/regex: mkdir
179+ build target/regex/parser.c: copy grammars/tree-sitter-regex/src/parser.c
180+ build target/regex/highlights.scm: copy $
181+ grammars/tree-sitter-regex/queries/highlights.scm
182+ build target/regex/regex.so: compile target/regex/parser.c
183+ build target/ruby: mkdir
184+ build target/ruby/parser.c: copy grammars/tree-sitter-ruby/src/parser.c
185+ build target/ruby/highlights.scm: copy $
186+ grammars/tree-sitter-ruby/queries/highlights.scm
187+ build target/ruby/ruby.so: compile target/ruby/parser.c
188+ build target/rust: mkdir
189+ build target/rust/parser.c: copy grammars/tree-sitter-rust/src/parser.c
190+ build target/rust/scanner.c: copy grammars/tree-sitter-rust/src/scanner.c
191+ build target/rust/highlights.scm: copy $
192+ grammars/tree-sitter-rust/queries/highlights.scm
193+ build target/rust/rust.so: compile target/rust/parser.c
194+ build target/scala: mkdir
195+ build target/scala/parser.c: copy grammars/tree-sitter-scala/src/parser.c
196+ build target/scala/scanner.c: copy grammars/tree-sitter-scala/src/scanner.c
197+ build target/scala/highlights.scm: copy $
198+ grammars/tree-sitter-scala/queries/scala/highlights.scm
199+ build target/scala/scala.so: compile target/scala/parser.c
200+ build target/sql: mkdir
201+ build target/sql/parser.c: copy grammars/tree-sitter-sql/src/parser.c
202+ build target/sql/highlights.scm: copy $
203+ grammars/tree-sitter-sql/queries/highlights.scm
204+ build target/sql/sql.so: compile target/sql/parser.c
205+ build target/toml: mkdir
206+ build target/toml/parser.c: copy grammars/tree-sitter-toml/src/parser.c
207+ build target/toml/scanner.c: copy grammars/tree-sitter-toml/src/scanner.c
208+ build target/toml/highlights.scm: copy $
209+ grammars/tree-sitter-toml/queries/highlights.scm
210+ build target/toml/toml.so: compile target/toml/parser.c
211+ build target/typescript: mkdir
212+ build target/typescript/parser.c: copy $
213+ grammars/tree-sitter-typescript/typescript/src/parser.c
214+ build target/typescript/scanner.c: fix-scanner-include $
215+ grammars/tree-sitter-typescript/typescript/src/scanner.c
216+ build target/typescript/common: mkdir
217+ build target/typescript/common/scanner.h: copy $
218+ grammars/tree-sitter-typescript/typescript/../common/scanner.h
219+ build target/typescript/highlights.scm: copy $
220+ grammars/tree-sitter-typescript/typescript/../queries/highlights.scm
221+ build target/typescript/typescript.so: compile target/typescript/parser.c
222+ build target/tsx: mkdir
223+ build target/tsx/parser.c: copy $
224+ grammars/tree-sitter-typescript/tsx/src/parser.c
225+ build target/tsx/scanner.c: fix-scanner-include $
226+ grammars/tree-sitter-typescript/tsx/src/scanner.c
227+ build target/tsx/common: mkdir
228+ build target/tsx/common/scanner.h: copy $
229+ grammars/tree-sitter-typescript/tsx/../common/scanner.h
230+ build target/tsx/highlights.scm: copy $
231+ grammars/tree-sitter-typescript/tsx/../queries/highlights.scm
232+ build target/tsx/tsx.so: compile target/tsx/parser.c
233+ build target/verilog: mkdir
234+ build target/verilog/parser.c: copy grammars/tree-sitter-verilog/src/parser.c
235+ build target/verilog/verilog.so: compile target/verilog/parser.c
236 diff --git a/build.py b/build.py
237new file mode 100755
238index 0000000..d6262b8
239--- /dev/null
240+++ b/build.py
241 @@ -0,0 +1,108 @@
242+ #!/usr/bin/env python
243+
244+ from collections import namedtuple
245+ from os import path
246+
247+ import ninja_syntax
248+
249+ Source = namedtuple("Source", ["name", "path", "includes", "highlights"])
250+
251+ sources = [
252+ Source("bash", "grammars/tree-sitter-bash", [], "queries/highlights.scm"),
253+ Source("c", "grammars/tree-sitter-c", [], "queries/highlights.scm"),
254+ Source("c-sharp", "grammars/tree-sitter-c-sharp", [], "queries/highlights.scm"),
255+ Source("diff", "grammars/tree-sitter-diff", [], "queries/highlights.scm"),
256+ Source("go", "grammars/tree-sitter-go", [], "queries/highlights.scm"),
257+ Source("haskell", "grammars/tree-sitter-haskell", [], "queries/highlights.scm"),
258+ Source("html", "grammars/tree-sitter-html", [], "queries/highlights.scm"),
259+ Source("java", "grammars/tree-sitter-java", [], "queries/highlights.scm"),
260+ Source(
261+ "javascript", "grammars/tree-sitter-javascript", [], "queries/highlights.scm"
262+ ),
263+ Source("json", "grammars/tree-sitter-json", [], "queries/highlights.scm"),
264+ Source("julia", "grammars/tree-sitter-julia", [], None),
265+ Source("markdown", "grammars/tree-sitter-markdown/tree-sitter-markdown", [], None),
266+ Source(
267+ "markdown-inline",
268+ "grammars/tree-sitter-markdown/tree-sitter-markdown-inline",
269+ [],
270+ "queries/highlights.scm",
271+ ),
272+ Source(
273+ "ocaml",
274+ "grammars/tree-sitter-ocaml/ocaml",
275+ [("../common/scanner.h", "common/scanner.h")],
276+ "../queries/highlights.scm",
277+ ),
278+ Source("php", "grammars/tree-sitter-php", [], "queries/highlights.scm"),
279+ Source("python", "grammars/tree-sitter-python", [], "queries/highlights.scm"),
280+ Source("regex", "grammars/tree-sitter-regex", [], "queries/highlights.scm"),
281+ Source("ruby", "grammars/tree-sitter-ruby", [], "queries/highlights.scm"),
282+ Source("rust", "grammars/tree-sitter-rust", [], "queries/highlights.scm"),
283+ Source("scala", "grammars/tree-sitter-scala", [], "queries/scala/highlights.scm"),
284+ Source("sql", "grammars/tree-sitter-sql", [], "queries/highlights.scm"),
285+ Source("toml", "grammars/tree-sitter-toml", [], "queries/highlights.scm"),
286+ Source(
287+ "typescript",
288+ "grammars/tree-sitter-typescript/typescript",
289+ [("../common/scanner.h", "common/scanner.h")],
290+ "../queries/highlights.scm",
291+ ),
292+ Source(
293+ "tsx",
294+ "grammars/tree-sitter-typescript/tsx",
295+ [("../common/scanner.h", "common/scanner.h")],
296+ "../queries/highlights.scm",
297+ ),
298+ Source("verilog", "grammars/tree-sitter-verilog", [], None),
299+ ]
300+
301+ scanner_hacks = ["ocaml", "typescript", "tsx"]
302+
303+
304+ if __name__ == "__main__":
305+ with open("build.ninja", "w") as fp:
306+ w = ninja_syntax.Writer(fp)
307+ w.comment("automatically generated, do not edit.")
308+ w.variable("cc", "cc -shared -fno-exceptions -g")
309+ w.rule("copy", command="cp $in $out")
310+ w.rule("mkdir", command="mkdir -p $out")
311+ w.rule("ts-compile", command="cd $in && tree-sitter generate")
312+ sed_fix = "s/..\/..\/common\/scanner.h/common\/scanner.h/g"
313+ w.rule("fix-scanner-include", command=f'cat $in | sed "{sed_fix}" > $out')
314+ w.rule("compile", command=f"$cc $in -o $out")
315+ w.build("target", "mkdir")
316+ for source in sources:
317+ w.build(path.join("target", source.name), "mkdir")
318+ parser_src_path = path.join(source.path, "src", "parser.c")
319+ # NOTE: all parsers currently have committed their generated code
320+ # so there isn't any reason to run this right now.
321+ # w.build(parser_path, "ts-compile", [source.path])
322+ parser_dst_path = path.join("target", source.name, "parser.c")
323+ w.build(parser_dst_path, "copy", inputs=[parser_src_path])
324+ scanner_src_path = path.join(source.path, "src", "scanner.c")
325+ scanner_dst_path = path.join("target", source.name, "scanner.c")
326+ if path.exists(scanner_src_path):
327+ if source.name in scanner_hacks:
328+ w.build(
329+ scanner_dst_path,
330+ "fix-scanner-include",
331+ inputs=[scanner_src_path],
332+ )
333+ else:
334+ w.build(scanner_dst_path, "copy", inputs=[scanner_src_path])
335+ for include in source.includes:
336+ include_dst_dir = path.join("target", source.name, "common")
337+ w.build(include_dst_dir, "mkdir")
338+ include_src_path = path.join(source.path, include[0])
339+ include_dst_path = path.join("target", source.name, include[1])
340+ w.build(include_dst_path, "copy", inputs=[include_src_path])
341+
342+ if source.highlights:
343+ highlights_src_path = path.join(source.path, source.highlights)
344+ highlights_dst_path = path.join("target", source.name, "highlights.scm")
345+ w.build(highlights_dst_path, "copy", inputs=[highlights_src_path])
346+ lib_out = path.join("target", source.name, f"{source.name}.so")
347+ w.build(lib_out, "compile", inputs=[parser_dst_path])
348+
349+ w.close()
350 diff --git a/generate.py b/generate.py
351deleted file mode 100755
352index 590e3b9..0000000
353--- a/generate.py
354+++ /dev/null
355 @@ -1,102 +0,0 @@
356- #!/usr/bin/env python
357-
358- import glob
359- import os
360- import shutil
361- import subprocess
362- from collections import namedtuple
363-
364- recompile = False
365-
366- Source = namedtuple("Source", ["name", "path", "includes", "highlights"])
367-
368- sources = [
369- Source("bash", "grammars/tree-sitter-bash", [], "queries"),
370- Source("c", "grammars/tree-sitter-c", [], "queries"),
371- Source("c-sharp","grammars/tree-sitter-c-sharp", [], "queries"),
372- Source("diff","grammars/tree-sitter-diff", [], "queries"),
373- Source("go","grammars/tree-sitter-go", [], "queries"),
374- Source("haskell", "grammars/tree-sitter-haskell", [], "queries"),
375- Source("html","grammars/tree-sitter-html", [], "queries"),
376- Source("java","grammars/tree-sitter-java", [], "queries"),
377- Source("javascript","grammars/tree-sitter-javascript", [], "queries"),
378- Source("json","grammars/tree-sitter-json", [], "queries"),
379- Source("julia","grammars/tree-sitter-julia", [], "queries"),
380- Source("markdown","grammars/tree-sitter-markdown/tree-sitter-markdown", [], None),
381- Source("markdown-inline","grammars/tree-sitter-markdown/tree-sitter-markdown-inline", [], "queries"),
382- Source("ocaml","grammars/tree-sitter-ocaml/ocaml", [("common", "../../common")], "../queries"),
383- Source("php","grammars/tree-sitter-php", [], "queries"),
384- Source("python","grammars/tree-sitter-python", [], "queries"),
385- Source("regex","grammars/tree-sitter-regex", [], "queries"),
386- Source("ruby","grammars/tree-sitter-ruby", [], "queries"),
387- Source("rust","grammars/tree-sitter-rust", [], "queries"),
388- Source("scala","grammars/tree-sitter-scala", [], "queries/scala"),
389- Source("sql","grammars/tree-sitter-sql", [], "queries") ,
390- Source("toml","grammars/tree-sitter-toml", [], "queries"),
391- Source("typescript","grammars/tree-sitter-typescript/typescript", [("common", "../../common")], "queries"),
392- Source("tsx","grammars/tree-sitter-typescript/tsx", [("common", "../../common")], "queries"),
393- Source("verilog","grammars/tree-sitter-verilog", [], "queries"),
394- ]
395-
396- def _fix_scanner_include():
397- current_dir = os.path.abspath(".")
398- scanner_path = f"{current_dir}/scanner.c"
399- # yep this is what im doing
400- fix = """s/..\/..\/common\/scanner.h/common\/scanner.h/g"""
401- subprocess.call(["sed", "-i", fix, scanner_path])
402-
403- def _hack_bash(src):
404- pass
405-
406- def _hack_ocaml(src):
407- _fix_scanner_include()
408-
409- def _hack_typescript(src):
410- _fix_scanner_include()
411-
412- def _hack_tsx(src):
413- _fix_scanner_include()
414-
415-
416- # some hand written scanners use relative paths that are picked up by the
417- # tree-sitter binary but not by build systems
418- hacks = {
419- "bash": _hack_bash,
420- "ocaml": _hack_ocaml,
421- "typescript": _hack_typescript,
422- "tsx": _hack_tsx,
423- }
424-
425- def compile_all():
426- if not os.path.exists("target"):
427- os.mkdir("target")
428- for source in sources:
429- print(source.name, source.path)
430- target = f"target/{source.name}"
431- source_dir = f"{source.path}/src"
432- parser_path = f"{source_dir}/parser.c"
433- if not os.path.exists(parser_path) or recompile:
434- subprocess.check_call(["tree-sitter", "generate"], cwd=source.path)
435- shutil.rmtree(target, ignore_errors=True)
436- shutil.copytree(source_dir, target)
437- for extra_dir in source.includes:
438- extra = os.path.join(source_dir, extra_dir[1])
439- shutil.copytree(extra, os.path.join(target, extra_dir[0]))
440- # seemingly a convention used in parsers support "related" languages
441- # like ocaml interfaces
442- common_path = f"{source_dir}/common"
443- if os.path.exists(common_path):
444- shutil.copytree(common_path, target)
445- if source.highlights:
446- highlights_path = os.path.join(source.path, source.highlights)
447- if os.path.exists(highlights_path):
448- shutil.copytree(highlights_path, target + "/queries")
449- current_dir = os.path.abspath(".")
450- if source.name in hacks:
451- os.chdir(target)
452- hacks[source.name](source.path)
453- os.chdir(current_dir)
454-
455-
456- if __name__ == "__main__":
457- compile_all()