diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index 343f4a62c2..510585d0dd 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -365,4 +365,91 @@ identical identifiers, highlighting both as |hl-WarningMsg|: > ((binary_expression left: (identifier) @WarningMsg.left right: (identifier) @WarningMsg.right) (eq? @WarningMsg.left @WarningMsg.right)) +Treesitter language injection (WIP) *lua-treesitter-language-injection* + +NOTE: This is a partially implemented feature, and not usable as a default +solution yet. What is documented here is a temporary interface intended +for those who want to experiment with this feature and contribute to +its development. + +Languages can have nested languages within them, for example javascript inside +HTML. We can "inject" a treesitter parser for a child language by configuring +injection queries. Here is an example of Javascript and CSS injected into +HTML. > + + local query = [[ + (script_element (raw_text) @javascript) + (style_element (raw_text) @css) + ]]; + + local parser = vim.treesitter.get_parser(nil, nil, { + injections = {html = query} + }) + + parser:parse() + +Any capture will be treated as the node treesitter will use for the injected +language. The capture name will be used as the language. There are a couple +reserved captures that do not have this behavior + +`@language` +This will use a nodes text content as the language to be injected. + +`@content` +This will use the captured nodes content as the injected content. + +`@combined` +This will combine all matches of a pattern as one single block of content. +By default, each match of a pattern is treated as it's own block of content +and parsed independent of each other. + +`@` +Any other capture name will be treated as both the language and the content. + +`@_` +Any capture with a leading "_" will not be treated as a language and will have +no special processing and is useful for capturing nodes for directives. + +Injections can be configured using `directives` instead of using capture +names. Here is an example of a directive that resolves the language based on a +buffer variable instead of statically in the query. > + + local query = require("vim.treesitter.query") + + query.add_directive("inject-preprocessor!", function(_, bufnr, _, _, data) + local success, lang = pcall(vim.api.nvim_buf_get_var, bufnr, "css_preprocessor") + + data.language = success and lang or "css" + end) + +Here is the same HTML query using this directive. > + + local query = [[ + (script_element (raw_text) @javascript) + (style_element + ((raw_text) @content + (#inject-preprocessor!))) + ]]; + + local parser = vim.treesitter.get_parser(nil, nil, { + injections = {html = query} + }) + + parser:parse() + +The following properties can be attached to the metadata object provided to +the directive. + +`language` +Same as the language capture. + +`content` +A list of ranges or nodes to inject as content. These ranges and/or nodes will +be treated as combined source and will be parsed within the same context. This +differs from the `@content` capture which only captures a single node as +content. This can also be a single number that references a captured node. + +`combined` +Same as the combined capture. + vim:tw=78:ts=8:ft=help:norl: diff --git a/runtime/lua/vim/treesitter/languagetree.lua b/runtime/lua/vim/treesitter/languagetree.lua index 4168c1e365..2f5aeb0710 100644 --- a/runtime/lua/vim/treesitter/languagetree.lua +++ b/runtime/lua/vim/treesitter/languagetree.lua @@ -12,14 +12,19 @@ LanguageTree.__index = LanguageTree -- @param source Can be a bufnr or a string of text to parse -- @param lang The language this tree represents -- @param opts Options table --- @param opts.queries A table of language to injection query strings. --- This is useful for overriding the built-in runtime file --- searching for the injection language query per language. +-- @param opts.injections A table of language to injection query strings. +-- This is useful for overriding the built-in runtime file +-- searching for the injection language query per language. function LanguageTree.new(source, lang, opts) language.require_language(lang) opts = opts or {} - local custom_queries = opts.queries or {} + if opts.queries then + a.nvim_err_writeln("'queries' is no longer supported. Use 'injections' now") + opts.injections = opts.queries + end + + local injections = opts.injections or {} local self = setmetatable({ _source = source, _lang = lang, @@ -27,8 +32,8 @@ function LanguageTree.new(source, lang, opts) _regions = {}, _trees = {}, _opts = opts, - _injection_query = custom_queries[lang] - and query.parse_query(lang, custom_queries[lang]) + _injection_query = injections[lang] + and query.parse_query(lang, injections[lang]) or query.get_query(lang, "injections"), _valid = false, _parser = vim._create_ts_parser(lang), @@ -297,33 +302,50 @@ function LanguageTree:_get_injections() for pattern, match, metadata in self._injection_query:iter_matches(root_node, self._source, start_line, end_line+1) do local lang = nil - local injection_node = nil - local combined = false + local ranges = {} + local combined = metadata.combined + + -- Directives can configure how injections are captured as well as actual node captures. + -- This allows more advanced processing for determining ranges and language resolution. + if metadata.content then + local content = metadata.content + + -- Allow for captured nodes to be used + if type(content) == "number" then + content = {match[content]} + end + + if content then + vim.list_extend(ranges, content) + end + end + + if metadata.language then + lang = metadata.language + end -- You can specify the content and language together -- using a tag with the language, for example -- @javascript for id, node in pairs(match) do - local data = metadata[id] local name = self._injection_query.captures[id] - local offset_range = data and data.offset -- Lang should override any other language tag - if name == "language" then + if name == "language" and not lang then lang = query.get_node_text(node, self._source) elseif name == "combined" then combined = true - elseif name == "content" then - injection_node = offset_range or node + elseif name == "content" and #ranges == 0 then + table.insert(ranges, node) -- Ignore any tags that start with "_" -- Allows for other tags to be used in matches elseif string.sub(name, 1, 1) ~= "_" then - if lang == nil then + if not lang then lang = name end - if not injection_node then - injection_node = offset_range or node + if #ranges == 0 then + table.insert(ranges, node) end end end @@ -337,21 +359,21 @@ function LanguageTree:_get_injections() injections[tree_index][lang] = {} end - -- Key by pattern so we can either combine each node to parse in the same - -- context or treat each node independently. + -- Key this by pattern. If combined is set to true all captures of this pattern + -- will be parsed by treesitter as the same "source". + -- If combined is false, each "region" will be parsed as a single source. if not injections[tree_index][lang][pattern] then - injections[tree_index][lang][pattern] = { combined = combined, nodes = {} } + injections[tree_index][lang][pattern] = { combined = combined, regions = {} } end - table.insert(injections[tree_index][lang][pattern].nodes, injection_node) + table.insert(injections[tree_index][lang][pattern].regions, ranges) end end local result = {} -- Generate a map by lang of node lists. - -- Each list is a set of ranges that should be parsed - -- together. + -- Each list is a set of ranges that should be parsed together. for _, lang_map in ipairs(injections) do for lang, patterns in pairs(lang_map) do if not result[lang] then @@ -360,10 +382,10 @@ function LanguageTree:_get_injections() for _, entry in pairs(patterns) do if entry.combined then - table.insert(result[lang], entry.nodes) + table.insert(result[lang], vim.tbl_flatten(entry.regions)) else - for _, node in ipairs(entry.nodes) do - table.insert(result[lang], {node}) + for _, ranges in ipairs(entry.regions) do + table.insert(result[lang], ranges) end end end diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua index 188ec94a6a..79a88c5dbb 100644 --- a/runtime/lua/vim/treesitter/query.lua +++ b/runtime/lua/vim/treesitter/query.lua @@ -79,17 +79,6 @@ local function read_query_files(filenames) return table.concat(contents, '') end -local match_metatable = { - __index = function(tbl, key) - rawset(tbl, key, {}) - return tbl[key] - end -} - -local function new_match_metadata() - return setmetatable({}, match_metatable) -end - --- The explicitly set queries from |vim.treesitter.query.set_query()| local explicit_queries = setmetatable({}, { __index = function(t, k) @@ -249,7 +238,7 @@ predicate_handlers["vim-match?"] = predicate_handlers["match?"] -- Directives store metadata or perform side effects against a match. -- Directives should always end with a `!`. -- Directive handler receive the following arguments --- (match, pattern, bufnr, predicate) +-- (match, pattern, bufnr, predicate, metadata) local directive_handlers = { ["set!"] = function(_, _, _, pred, metadata) if #pred == 4 then @@ -269,7 +258,6 @@ local directive_handlers = { local start_col_offset = pred[4] or 0 local end_row_offset = pred[5] or 0 local end_col_offset = pred[6] or 0 - local key = pred[7] or "offset" range[1] = range[1] + start_row_offset range[2] = range[2] + start_col_offset @@ -278,7 +266,7 @@ local directive_handlers = { -- If this produces an invalid range, we just skip it. if range[1] < range[3] or (range[1] == range[3] and range[2] <= range[4]) then - metadata[pred[2]][key] = range + metadata.content = {range} end end } @@ -410,7 +398,7 @@ function Query:iter_captures(node, source, start, stop) local raw_iter = node:_rawquery(self.query, true, start, stop) local function iter() local capture, captured_node, match = raw_iter() - local metadata = new_match_metadata() + local metadata = {} if match ~= nil then local active = self:match_preds(match, match.pattern, source) @@ -445,7 +433,7 @@ function Query:iter_matches(node, source, start, stop) local raw_iter = node:_rawquery(self.query, false, start, stop) local function iter() local pattern, match = raw_iter() - local metadata = new_match_metadata() + local metadata = {} if match ~= nil then local active = self:match_preds(match, pattern, source) diff --git a/test/functional/treesitter/highlight_spec.lua b/test/functional/treesitter/highlight_spec.lua index d80d0fdbaf..05e0c5fe2c 100644 --- a/test/functional/treesitter/highlight_spec.lua +++ b/test/functional/treesitter/highlight_spec.lua @@ -445,7 +445,7 @@ describe('treesitter highlighting', function() exec_lua [[ local parser = vim.treesitter.get_parser(0, "c", { - queries = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"} + injections = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"} }) local highlighter = vim.treesitter.highlighter test_hl = highlighter.new(parser, {queries = {c = hl_query}}) diff --git a/test/functional/treesitter/parser_spec.lua b/test/functional/treesitter/parser_spec.lua index f99362fbdf..72ff6f2fb6 100644 --- a/test/functional/treesitter/parser_spec.lua +++ b/test/functional/treesitter/parser_spec.lua @@ -468,7 +468,7 @@ int x = INT_MAX; it("should inject a language", function() exec_lua([[ parser = vim.treesitter.get_parser(0, "c", { - queries = { + injections = { c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}}) ]]) @@ -489,7 +489,7 @@ int x = INT_MAX; it("should inject a language", function() exec_lua([[ parser = vim.treesitter.get_parser(0, "c", { - queries = { + injections = { c = "(preproc_def (preproc_arg) @c @combined) (preproc_function_def value: (preproc_arg) @c @combined)"}}) ]]) @@ -506,11 +506,39 @@ int x = INT_MAX; end) end) + describe("when providing parsing information through a directive", function() + it("should inject a language", function() + exec_lua([=[ + vim.treesitter.add_directive("inject-clang!", function(match, _, _, pred, metadata) + metadata.language = "c" + metadata.combined = true + metadata.content = pred[2] + end) + + parser = vim.treesitter.get_parser(0, "c", { + injections = { + c = "(preproc_def ((preproc_arg) @_c (#inject-clang! @_c)))" .. + "(preproc_function_def value: ((preproc_arg) @_a (#inject-clang! @_a)))"}}) + ]=]) + + eq("table", exec_lua("return type(parser:children().c)")) + eq(2, exec_lua("return #parser:children().c:trees()")) + eq({ + {0, 0, 7, 0}, -- root tree + {3, 14, 5, 18}, -- VALUE 123 + -- VALUE1 123 + -- VALUE2 123 + {1, 26, 2, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y)) + -- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y)) + }, get_ranges()) + end) + end) + describe("when using the offset directive", function() it("should shift the range by the directive amount", function() exec_lua([[ parser = vim.treesitter.get_parser(0, "c", { - queries = { + injections = { c = "(preproc_def ((preproc_arg) @c (#offset! @c 0 2 0 -1))) (preproc_function_def value: (preproc_arg) @c)"}}) ]]) @@ -538,7 +566,7 @@ int x = INT_MAX; it("should return the correct language tree", function() local result = exec_lua([[ parser = vim.treesitter.get_parser(0, "c", { - queries = { c = "(preproc_def (preproc_arg) @c)"}}) + injections = { c = "(preproc_def (preproc_arg) @c)"}}) local sub_tree = parser:language_for_range({1, 18, 1, 19}) @@ -572,28 +600,5 @@ int x = INT_MAX; eq(result, "value") end) end) - - describe("when setting for a capture match", function() - it("should set/get the data correctly", function() - insert([[ - int x = 3; - ]]) - - local result = exec_lua([[ - local result - - query = vim.treesitter.parse_query("c", '((number_literal) @number (#set! @number "key" "value"))') - parser = vim.treesitter.get_parser(0, "c") - - for pattern, match, metadata in query:iter_matches(parser:parse()[1]:root(), 0) do - result = metadata[pattern].key - end - - return result - ]]) - - eq(result, "value") - end) - end) end) end)