From 31c4ed26bc278282898123ad21bb6fead401fd6f Mon Sep 17 00:00:00 2001 From: Christian Clason Date: Fri, 11 Aug 2023 17:05:17 +0200 Subject: [PATCH] feat(treesitter): add injection language fallback (#24659) * feat(treesitter): add injection language fallback Problem: injection languages are often specified via aliases (e.g., filetype or in upper case), requiring custom directives. Solution: include lookup logic (try as parser name, then filetype, then lowercase) in LanguageTree itself and remove `#inject-language` directive. Co-authored-by: Lewis Russell --- runtime/doc/news.txt | 18 ++++++++------ runtime/doc/treesitter.txt | 14 +---------- runtime/lua/vim/treesitter/languagetree.lua | 27 +++++++++++++++++++-- runtime/lua/vim/treesitter/query.lua | 27 --------------------- runtime/queries/markdown/injections.scm | 5 ++-- test/functional/treesitter/parser_spec.lua | 2 +- 6 files changed, 39 insertions(+), 54 deletions(-) diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 713569e1ad..a1981d5b7f 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -77,15 +77,9 @@ The following new APIs and features were added. • Added |vim.keycode()| for translating keycodes in a string. -• Added |vim.treesitter.query.omnifunc()| for treesitter query files (set by - default). - • |'smoothscroll'| option to scroll by screen line rather than by text line when |'wrap'| is set. -• |Query:iter_matches()| now has the ability to set the maximum start depth - for matches. - • Added inline virtual text support to |nvim_buf_set_extmark()|. • The terminal buffer now supports reflow (wrapped lines adapt when the buffer @@ -120,8 +114,16 @@ The following new APIs and features were added. `client.supports_method()`. It considers both the dynamic capabilities and static `server_capabilities`. -• Bundled treesitter parser and queries (highlight, folds) for Markdown, - Python, and Bash. +• Treesitter + • Bundled parsers and queries (highlight, folds) for Markdown, Python, and + Bash. + • Added |vim.treesitter.query.omnifunc()| for treesitter query files (set by + default). + • |Query:iter_matches()| now has the ability to set the maximum start depth + for matches. + • `@injection.language` now has smarter resolution and will now fallback to language aliases and/or attempt lower case variants of the text. + language via aliases (e.g., filetype) registered via + `vim.treesitter.language.register`. • |vim.ui.open()| opens URIs using the system default handler (macOS `open`, Windows `explorer`, Linux `xdg-open`, etc.) diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index 7c7be5f214..13c0bd024a 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -311,19 +311,7 @@ The following directives are built in: {capture_id} Example: >query - (#inject-language! @_lang) -< - `inject-language!` *treesitter-directive-inject-language!* - Set the injection language from the node text, interpreted first as a - language name, then (if a parser is not found) a filetype. Custom - aliases can be added via |vim.treesitter.language.register()|. This - will set a new `metadata[capture_id]['injection.language']`. - - Parameters: ~ - {capture_id} - - Example: >query - (#inject-language! @_lang) + (#trim! @fold) < Further directives can be added via |vim.treesitter.query.add_directive()|. Use |vim.treesitter.query.list_directives()| to list all available directives. diff --git a/runtime/lua/vim/treesitter/languagetree.lua b/runtime/lua/vim/treesitter/languagetree.lua index 4b2628609a..7f1d6d1111 100644 --- a/runtime/lua/vim/treesitter/languagetree.lua +++ b/runtime/lua/vim/treesitter/languagetree.lua @@ -635,6 +635,29 @@ local function add_injection(t, tree_index, pattern, lang, combined, ranges) table.insert(t[tree_index][lang][pattern].regions, ranges) end +-- TODO(clason): replace by refactored `ts.has_parser` API (without registering) +---@param lang string parser name +---@return boolean # true if parser for {lang} exists on rtp +local has_parser = function(lang) + return vim._ts_has_language(lang) + or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0 +end + +--- Return parser name for language (if exists) or filetype (if registered and exists) +--- +---@param alias string language or filetype name +---@return string? # resolved parser name +local function resolve_lang(alias) + if has_parser(alias) then + return alias + end + + local lang = vim.treesitter.language.get_lang(alias) + if lang and has_parser(lang) then + return lang + end +end + ---@private --- Extract injections according to: --- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection @@ -649,10 +672,10 @@ function LanguageTree:_get_injection(match, metadata) for id, node in pairs(match) do local name = self._injection_query.captures[id] - -- Lang should override any other language tag if name == 'injection.language' then - lang = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] }) + local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] }) + lang = resolve_lang(text) or resolve_lang(text:lower()) elseif name == 'injection.content' then ranges = get_node_ranges(node, self._source, metadata[id], include_children) end diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua index 3b7e74c0cf..c3213e0192 100644 --- a/runtime/lua/vim/treesitter/query.lua +++ b/runtime/lua/vim/treesitter/query.lua @@ -541,33 +541,6 @@ local directive_handlers = { metadata.range = { start_row, start_col, end_row, end_col } end end, - -- Set injection language from node text, interpreted first as language and then as filetype - -- Example: (#inject-language! @_lang) - ['inject-language!'] = function(match, _, bufnr, pred, metadata) - local id = pred[2] - local node = match[id] - if not node then - return - end - - -- TODO(clason): replace by refactored `ts.has_parser` API - local has_parser = function(lang) - return vim._ts_has_language(lang) - or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0 - end - - local alias = vim.treesitter.get_node_text(node, bufnr, { metadata = metadata[id] }) - if not alias then - return - elseif has_parser(alias) then - metadata['injection.language'] = alias - else - local lang = vim.treesitter.language.get_lang(alias) - if lang and has_parser(lang) then - metadata['injection.language'] = lang - end - end - end, } --- Adds a new predicate to be used in queries diff --git a/runtime/queries/markdown/injections.scm b/runtime/queries/markdown/injections.scm index 0bead6f4ac..fda7036830 100644 --- a/runtime/queries/markdown/injections.scm +++ b/runtime/queries/markdown/injections.scm @@ -1,8 +1,7 @@ (fenced_code_block (info_string - (language) @_lang) - (code_fence_content) @injection.content - (#inject-language! @_lang)) + (language) @injection.language) + (code_fence_content) @injection.content) ((html_block) @injection.content (#set! injection.language "html") diff --git a/test/functional/treesitter/parser_spec.lua b/test/functional/treesitter/parser_spec.lua index da84f435c9..834998bae7 100644 --- a/test/functional/treesitter/parser_spec.lua +++ b/test/functional/treesitter/parser_spec.lua @@ -783,7 +783,7 @@ int x = INT_MAX; return list ]] - eq({ 'gsub!', 'inject-language!', 'offset!', 'set!', 'trim!' }, res_list) + eq({ 'gsub!', 'offset!', 'set!', 'trim!' }, res_list) end) end) end)