mirror of
https://github.com/neovim/neovim.git
synced 2025-01-01 17:23:36 -07:00
e353c869ce
This is incorrect in the following scenario: 1. The language tree is Lua > Vim > Lua. 2. An edit simultaneously wipes out the `_regions` of all nodes, while taking the Vim injection off-screen. 3. The Vim injection is not re-parsed, so the child Lua `_regions` is still `nil`. 4. The child Lua is assumed, incorrectly, to occupy the whole document. 5. This causes the injections to be parsed again, resulting in Lua > Vim > Lua > Vim. 6. Now, by the same process, Vim ends up with its range assumed over the whole document. Now the parse is broken and results in broken highlighting and poor performance. It should be fine to instead treat an unparsed node as occupying nothing (i.e. effectively non-existent). Since, either: - The parent was just parsed, hence defining `_regions` - The parent was not just parsed, in which case this node doesn't need to be parsed either. Also, the name `has_regions` is confusing; it seems to simply mean the opposite of "root" or "full_document". However, this PR does not touch it.
1150 lines
32 KiB
Lua
1150 lines
32 KiB
Lua
--- @defgroup lua-treesitter-languagetree
|
|
---
|
|
--- @brief A \*LanguageTree\* contains a tree of parsers: the root treesitter parser for {lang} and
|
|
--- any "injected" language parsers, which themselves may inject other languages, recursively.
|
|
--- For example a Lua buffer containing some Vimscript commands needs multiple parsers to fully
|
|
--- understand its contents.
|
|
---
|
|
--- To create a LanguageTree (parser object) for a given buffer and language, use:
|
|
---
|
|
--- ```lua
|
|
--- local parser = vim.treesitter.get_parser(bufnr, lang)
|
|
--- ```
|
|
---
|
|
--- (where `bufnr=0` means current buffer). `lang` defaults to 'filetype'.
|
|
--- Note: currently the parser is retained for the lifetime of a buffer but this may change;
|
|
--- a plugin should keep a reference to the parser object if it wants incremental updates.
|
|
---
|
|
--- Whenever you need to access the current syntax tree, parse the buffer:
|
|
---
|
|
--- ```lua
|
|
--- local tree = parser:parse({ start_row, end_row })
|
|
--- ```
|
|
---
|
|
--- This returns a table of immutable |treesitter-tree| objects representing the current state of
|
|
--- the buffer. When the plugin wants to access the state after a (possible) edit it must call
|
|
--- `parse()` again. If the buffer wasn't edited, the same tree will be returned again without extra
|
|
--- work. If the buffer was parsed before, incremental parsing will be done of the changed parts.
|
|
---
|
|
--- Note: To use the parser directly inside a |nvim_buf_attach()| Lua callback, you must call
|
|
--- |vim.treesitter.get_parser()| before you register your callback. But preferably parsing
|
|
--- shouldn't be done directly in the change callback anyway as they will be very frequent. Rather
|
|
--- a plugin that does any kind of analysis on a tree should use a timer to throttle too frequent
|
|
--- updates.
|
|
---
|
|
|
|
-- Debugging:
|
|
--
|
|
-- vim.g.__ts_debug levels:
|
|
-- - 1. Messages from languagetree.lua
|
|
-- - 2. Parse messages from treesitter
|
|
-- - 2. Lex messages from treesitter
|
|
--
|
|
-- Log file can be found in stdpath('log')/treesitter.log
|
|
|
|
local query = require('vim.treesitter.query')
|
|
local language = require('vim.treesitter.language')
|
|
local Range = require('vim.treesitter._range')
|
|
|
|
---@alias TSCallbackName
|
|
---| 'changedtree'
|
|
---| 'bytes'
|
|
---| 'detach'
|
|
---| 'child_added'
|
|
---| 'child_removed'
|
|
|
|
---@alias TSCallbackNameOn
|
|
---| 'on_changedtree'
|
|
---| 'on_bytes'
|
|
---| 'on_detach'
|
|
---| 'on_child_added'
|
|
---| 'on_child_removed'
|
|
|
|
--- @type table<TSCallbackNameOn,TSCallbackName>
|
|
local TSCallbackNames = {
|
|
on_changedtree = 'changedtree',
|
|
on_bytes = 'bytes',
|
|
on_detach = 'detach',
|
|
on_child_added = 'child_added',
|
|
on_child_removed = 'child_removed',
|
|
}
|
|
|
|
---@class LanguageTree
|
|
---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
|
|
---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
|
|
---@field private _children table<string,LanguageTree> Injected languages
|
|
---@field private _injection_query Query Queries defining injected languages
|
|
---@field private _injections_processed boolean
|
|
---@field private _opts table Options
|
|
---@field private _parser TSParser Parser for language
|
|
---@field private _has_regions boolean
|
|
---@field private _regions table<integer, Range6[]>?
|
|
---List of regions this tree should manage and parse. If nil then regions are
|
|
---taken from _trees. This is mostly a short-lived cache for included_regions()
|
|
---@field private _lang string Language name
|
|
---@field private _parent_lang? string Parent language name
|
|
---@field private _source (integer|string) Buffer or string to parse
|
|
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
|
|
---Each key is the index of region, which is synced with _regions and _valid.
|
|
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
|
|
---@field private _logger? fun(logtype: string, msg: string)
|
|
---@field private _logfile? file*
|
|
local LanguageTree = {}
|
|
|
|
---@class LanguageTreeOpts
|
|
---@field queries table<string,string> -- Deprecated
|
|
---@field injections table<string,string>
|
|
|
|
LanguageTree.__index = LanguageTree
|
|
|
|
--- @package
|
|
---
|
|
--- |LanguageTree| contains a tree of parsers: the root treesitter parser for {lang} and any
|
|
--- "injected" language parsers, which themselves may inject other languages, recursively.
|
|
---
|
|
---@param source (integer|string) Buffer or text string to parse
|
|
---@param lang string Root language of this tree
|
|
---@param opts (table|nil) Optional arguments:
|
|
--- - injections table Map of language to injection query strings. Overrides the
|
|
--- built-in runtime file searching for language injections.
|
|
---@param parent_lang? string Parent language name of this tree
|
|
---@return LanguageTree parser object
|
|
function LanguageTree.new(source, lang, opts, parent_lang)
|
|
language.add(lang)
|
|
---@type LanguageTreeOpts
|
|
opts = opts or {}
|
|
|
|
if source == 0 then
|
|
source = vim.api.nvim_get_current_buf()
|
|
end
|
|
|
|
local injections = opts.injections or {}
|
|
|
|
--- @type LanguageTree
|
|
local self = {
|
|
_source = source,
|
|
_lang = lang,
|
|
_parent_lang = parent_lang,
|
|
_children = {},
|
|
_trees = {},
|
|
_opts = opts,
|
|
_injection_query = injections[lang] and query.parse(lang, injections[lang])
|
|
or query.get(lang, 'injections'),
|
|
_has_regions = false,
|
|
_injections_processed = false,
|
|
_valid = false,
|
|
_parser = vim._create_ts_parser(lang),
|
|
_callbacks = {},
|
|
_callbacks_rec = {},
|
|
}
|
|
|
|
setmetatable(self, LanguageTree)
|
|
|
|
if vim.g.__ts_debug and type(vim.g.__ts_debug) == 'number' then
|
|
self:_set_logger()
|
|
self:_log('START')
|
|
end
|
|
|
|
for _, name in pairs(TSCallbackNames) do
|
|
self._callbacks[name] = {}
|
|
self._callbacks_rec[name] = {}
|
|
end
|
|
|
|
return self
|
|
end
|
|
|
|
--- @private
|
|
function LanguageTree:_set_logger()
|
|
local source = self:source()
|
|
source = type(source) == 'string' and 'text' or tostring(source)
|
|
|
|
local lang = self:lang()
|
|
|
|
vim.fn.mkdir(vim.fn.stdpath('log'), 'p')
|
|
local logfilename = vim.fs.joinpath(vim.fn.stdpath('log'), 'treesitter.log')
|
|
|
|
local logfile, openerr = io.open(logfilename, 'a+')
|
|
|
|
if not logfile or openerr then
|
|
error(string.format('Could not open file (%s) for logging: %s', logfilename, openerr))
|
|
return
|
|
end
|
|
|
|
self._logfile = logfile
|
|
|
|
self._logger = function(logtype, msg)
|
|
self._logfile:write(string.format('%s:%s:(%s) %s\n', source, lang, logtype, msg))
|
|
self._logfile:flush()
|
|
end
|
|
|
|
local log_lex = vim.g.__ts_debug >= 3
|
|
local log_parse = vim.g.__ts_debug >= 2
|
|
self._parser:_set_logger(log_lex, log_parse, self._logger)
|
|
end
|
|
|
|
---Measure execution time of a function
|
|
---@generic R1, R2, R3
|
|
---@param f fun(): R1, R2, R2
|
|
---@return number, R1, R2, R3
|
|
local function tcall(f, ...)
|
|
local start = vim.uv.hrtime()
|
|
---@diagnostic disable-next-line
|
|
local r = { f(...) }
|
|
--- @type number
|
|
local duration = (vim.uv.hrtime() - start) / 1000000
|
|
return duration, unpack(r)
|
|
end
|
|
|
|
---@private
|
|
---@vararg any
|
|
function LanguageTree:_log(...)
|
|
if not self._logger then
|
|
return
|
|
end
|
|
|
|
if not vim.g.__ts_debug or vim.g.__ts_debug < 1 then
|
|
return
|
|
end
|
|
|
|
local args = { ... }
|
|
if type(args[1]) == 'function' then
|
|
args = { args[1]() }
|
|
end
|
|
|
|
local info = debug.getinfo(2, 'nl')
|
|
local nregions = vim.tbl_count(self:included_regions())
|
|
local prefix =
|
|
string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions)
|
|
|
|
local msg = { prefix }
|
|
for _, x in ipairs(args) do
|
|
if type(x) == 'string' then
|
|
msg[#msg + 1] = x
|
|
else
|
|
msg[#msg + 1] = vim.inspect(x, { newline = ' ', indent = '' })
|
|
end
|
|
end
|
|
self._logger('nvim', table.concat(msg, ' '))
|
|
end
|
|
|
|
--- Invalidates this parser and all its children
|
|
---@param reload boolean|nil
|
|
function LanguageTree:invalidate(reload)
|
|
self._valid = false
|
|
|
|
-- buffer was reloaded, reparse all trees
|
|
if reload then
|
|
for _, t in pairs(self._trees) do
|
|
self:_do_callback('changedtree', t:included_ranges(true), t)
|
|
end
|
|
self._trees = {}
|
|
end
|
|
|
|
for _, child in pairs(self._children) do
|
|
child:invalidate(reload)
|
|
end
|
|
end
|
|
|
|
--- Returns all trees of the regions parsed by this parser.
|
|
--- Does not include child languages.
|
|
--- The result is list-like if
|
|
--- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
|
|
--- * the root LanguageTree is fully parsed.
|
|
---
|
|
---@return table<integer, TSTree>
|
|
function LanguageTree:trees()
|
|
return self._trees
|
|
end
|
|
|
|
--- Gets the language of this tree node.
|
|
function LanguageTree:lang()
|
|
return self._lang
|
|
end
|
|
|
|
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
|
|
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
|
|
---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
|
|
---@return boolean
|
|
function LanguageTree:is_valid(exclude_children)
|
|
local valid = self._valid
|
|
|
|
if type(valid) == 'table' then
|
|
for i, _ in pairs(self:included_regions()) do
|
|
if not valid[i] then
|
|
return false
|
|
end
|
|
end
|
|
end
|
|
|
|
if not exclude_children then
|
|
if not self._injections_processed then
|
|
return false
|
|
end
|
|
|
|
for _, child in pairs(self._children) do
|
|
if not child:is_valid(exclude_children) then
|
|
return false
|
|
end
|
|
end
|
|
end
|
|
|
|
if type(valid) == 'boolean' then
|
|
return valid
|
|
end
|
|
|
|
self._valid = true
|
|
return true
|
|
end
|
|
|
|
--- Returns a map of language to child tree.
|
|
function LanguageTree:children()
|
|
return self._children
|
|
end
|
|
|
|
--- Returns the source content of the language tree (bufnr or string).
|
|
function LanguageTree:source()
|
|
return self._source
|
|
end
|
|
|
|
--- @param region Range6[]
|
|
--- @param range? boolean|Range
|
|
--- @return boolean
|
|
local function intercepts_region(region, range)
|
|
if #region == 0 then
|
|
return true
|
|
end
|
|
|
|
if range == nil then
|
|
return false
|
|
end
|
|
|
|
if type(range) == 'boolean' then
|
|
return range
|
|
end
|
|
|
|
for _, r in ipairs(region) do
|
|
if Range.intercepts(r, range) then
|
|
return true
|
|
end
|
|
end
|
|
|
|
return false
|
|
end
|
|
|
|
--- @private
|
|
--- @param range boolean|Range?
|
|
--- @return Range6[] changes
|
|
--- @return integer no_regions_parsed
|
|
--- @return number total_parse_time
|
|
function LanguageTree:_parse_regions(range)
|
|
local changes = {}
|
|
local no_regions_parsed = 0
|
|
local total_parse_time = 0
|
|
|
|
if type(self._valid) ~= 'table' then
|
|
self._valid = {}
|
|
end
|
|
|
|
-- If there are no ranges, set to an empty list
|
|
-- so the included ranges in the parser are cleared.
|
|
for i, ranges in pairs(self:included_regions()) do
|
|
if not self._valid[i] and intercepts_region(ranges, range) then
|
|
self._parser:set_included_ranges(ranges)
|
|
local parse_time, tree, tree_changes =
|
|
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
|
|
|
|
-- Pass ranges if this is an initial parse
|
|
local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
|
|
|
|
self:_do_callback('changedtree', cb_changes, tree)
|
|
self._trees[i] = tree
|
|
vim.list_extend(changes, tree_changes)
|
|
|
|
total_parse_time = total_parse_time + parse_time
|
|
no_regions_parsed = no_regions_parsed + 1
|
|
self._valid[i] = true
|
|
end
|
|
end
|
|
|
|
return changes, no_regions_parsed, total_parse_time
|
|
end
|
|
|
|
--- @private
|
|
--- @return number
|
|
function LanguageTree:_add_injections()
|
|
local seen_langs = {} ---@type table<string,boolean>
|
|
|
|
local query_time, injections_by_lang = tcall(self._get_injections, self)
|
|
for lang, injection_regions in pairs(injections_by_lang) do
|
|
local has_lang = pcall(language.add, lang)
|
|
|
|
-- Child language trees should just be ignored if not found, since
|
|
-- they can depend on the text of a node. Intermediate strings
|
|
-- would cause errors for unknown parsers.
|
|
if has_lang then
|
|
local child = self._children[lang]
|
|
|
|
if not child then
|
|
child = self:add_child(lang)
|
|
end
|
|
|
|
child:set_included_regions(injection_regions)
|
|
seen_langs[lang] = true
|
|
end
|
|
end
|
|
|
|
for lang, _ in pairs(self._children) do
|
|
if not seen_langs[lang] then
|
|
self:remove_child(lang)
|
|
end
|
|
end
|
|
|
|
return query_time
|
|
end
|
|
|
|
--- Recursively parse all regions in the language tree using |treesitter-parsers|
|
|
--- for the corresponding languages and run injection queries on the parsed trees
|
|
--- to determine whether child trees should be created and parsed.
|
|
---
|
|
--- Any region with empty range (`{}`, typically only the root tree) is always parsed;
|
|
--- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
|
|
---
|
|
--- @param range boolean|Range|nil: Parse this range in the parser's source.
|
|
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
|
|
--- Set to `false|nil` to only parse regions with empty ranges (typically
|
|
--- only the root tree without injections).
|
|
--- @return table<integer, TSTree>
|
|
function LanguageTree:parse(range)
|
|
if self:is_valid() then
|
|
self:_log('valid')
|
|
return self._trees
|
|
end
|
|
|
|
local changes --- @type Range6[]?
|
|
|
|
-- Collect some stats
|
|
local no_regions_parsed = 0
|
|
local query_time = 0
|
|
local total_parse_time = 0
|
|
|
|
--- At least 1 region is invalid
|
|
if not self:is_valid(true) then
|
|
changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
|
|
-- Need to run injections when we parsed something
|
|
if no_regions_parsed > 0 then
|
|
self._injections_processed = false
|
|
end
|
|
end
|
|
|
|
if not self._injections_processed and range ~= false and range ~= nil then
|
|
query_time = self:_add_injections()
|
|
self._injections_processed = true
|
|
end
|
|
|
|
self:_log({
|
|
changes = changes and #changes > 0 and changes or nil,
|
|
regions_parsed = no_regions_parsed,
|
|
parse_time = total_parse_time,
|
|
query_time = query_time,
|
|
range = range,
|
|
})
|
|
|
|
for _, child in pairs(self._children) do
|
|
child:parse(range)
|
|
end
|
|
|
|
return self._trees
|
|
end
|
|
|
|
---@deprecated Misleading name. Use `LanguageTree:children()` (non-recursive) instead,
|
|
--- add recursion yourself if needed.
|
|
--- Invokes the callback for each |LanguageTree| and its children recursively
|
|
---
|
|
---@param fn fun(tree: LanguageTree, lang: string)
|
|
---@param include_self boolean|nil Whether to include the invoking tree in the results
|
|
function LanguageTree:for_each_child(fn, include_self)
|
|
vim.deprecate('LanguageTree:for_each_child()', 'LanguageTree:children()', '0.11')
|
|
if include_self then
|
|
fn(self, self._lang)
|
|
end
|
|
|
|
for _, child in pairs(self._children) do
|
|
child:for_each_child(fn, true)
|
|
end
|
|
end
|
|
|
|
--- Invokes the callback for each |LanguageTree| recursively.
|
|
---
|
|
--- Note: This includes the invoking tree's child trees as well.
|
|
---
|
|
---@param fn fun(tree: TSTree, ltree: LanguageTree)
|
|
function LanguageTree:for_each_tree(fn)
|
|
for _, tree in pairs(self._trees) do
|
|
fn(tree, self)
|
|
end
|
|
|
|
for _, child in pairs(self._children) do
|
|
child:for_each_tree(fn)
|
|
end
|
|
end
|
|
|
|
--- Adds a child language to this |LanguageTree|.
|
|
---
|
|
--- If the language already exists as a child, it will first be removed.
|
|
---
|
|
---@private
|
|
---@param lang string Language to add.
|
|
---@return LanguageTree injected
|
|
function LanguageTree:add_child(lang)
|
|
if self._children[lang] then
|
|
self:remove_child(lang)
|
|
end
|
|
|
|
local child = LanguageTree.new(self._source, lang, self._opts, self:lang())
|
|
|
|
-- Inherit recursive callbacks
|
|
for nm, cb in pairs(self._callbacks_rec) do
|
|
vim.list_extend(child._callbacks_rec[nm], cb)
|
|
end
|
|
|
|
self._children[lang] = child
|
|
self:invalidate()
|
|
self:_do_callback('child_added', self._children[lang])
|
|
|
|
return self._children[lang]
|
|
end
|
|
|
|
--- Removes a child language from this |LanguageTree|.
|
|
---
|
|
---@private
|
|
---@param lang string Language to remove.
|
|
function LanguageTree:remove_child(lang)
|
|
local child = self._children[lang]
|
|
|
|
if child then
|
|
self._children[lang] = nil
|
|
child:destroy()
|
|
self:invalidate()
|
|
self:_do_callback('child_removed', child)
|
|
end
|
|
end
|
|
|
|
--- Destroys this |LanguageTree| and all its children.
|
|
---
|
|
--- Any cleanup logic should be performed here.
|
|
---
|
|
--- Note: This DOES NOT remove this tree from a parent. Instead,
|
|
--- `remove_child` must be called on the parent to remove it.
|
|
function LanguageTree:destroy()
|
|
-- Cleanup here
|
|
for _, child in pairs(self._children) do
|
|
child:destroy()
|
|
end
|
|
end
|
|
|
|
---@param region Range6[]
|
|
local function region_tostr(region)
|
|
if #region == 0 then
|
|
return '[]'
|
|
end
|
|
local srow, scol = region[1][1], region[1][2]
|
|
local erow, ecol = region[#region][4], region[#region][5]
|
|
return string.format('[%d:%d-%d:%d]', srow, scol, erow, ecol)
|
|
end
|
|
|
|
---@private
|
|
---Iterate through all the regions. fn returns a boolean to indicate if the
|
|
---region is valid or not.
|
|
---@param fn fun(index: integer, region: Range6[]): boolean
|
|
function LanguageTree:_iter_regions(fn)
|
|
if not self._valid then
|
|
return
|
|
end
|
|
|
|
local was_valid = type(self._valid) ~= 'table'
|
|
|
|
if was_valid then
|
|
self:_log('was valid', self._valid)
|
|
self._valid = {}
|
|
end
|
|
|
|
local all_valid = true
|
|
|
|
for i, region in pairs(self:included_regions()) do
|
|
if was_valid or self._valid[i] then
|
|
self._valid[i] = fn(i, region)
|
|
if not self._valid[i] then
|
|
self:_log(function()
|
|
return 'invalidating region', i, region_tostr(region)
|
|
end)
|
|
end
|
|
end
|
|
|
|
if not self._valid[i] then
|
|
all_valid = false
|
|
end
|
|
end
|
|
|
|
-- Compress the valid value to 'true' if there are no invalid regions
|
|
if all_valid then
|
|
self._valid = all_valid
|
|
end
|
|
end
|
|
|
|
--- Sets the included regions that should be parsed by this |LanguageTree|.
|
|
--- A region is a set of nodes and/or ranges that will be parsed in the same context.
|
|
---
|
|
--- For example, `{ { node1 }, { node2} }` contains two separate regions.
|
|
--- They will be parsed by the parser in two different contexts, thus resulting
|
|
--- in two separate trees.
|
|
---
|
|
--- On the other hand, `{ { node1, node2 } }` is a single region consisting of
|
|
--- two nodes. This will be parsed by the parser in a single context, thus resulting
|
|
--- in a single tree.
|
|
---
|
|
--- This allows for embedded languages to be parsed together across different
|
|
--- nodes, which is useful for templating languages like ERB and EJS.
|
|
---
|
|
---@private
|
|
---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse.
|
|
function LanguageTree:set_included_regions(new_regions)
|
|
self._has_regions = true
|
|
|
|
-- Transform the tables from 4 element long to 6 element long (with byte offset)
|
|
for _, region in ipairs(new_regions) do
|
|
for i, range in ipairs(region) do
|
|
if type(range) == 'table' and #range == 4 then
|
|
region[i] = Range.add_bytes(self._source, range --[[@as Range4]])
|
|
elseif type(range) == 'userdata' then
|
|
region[i] = { range:range(true) }
|
|
end
|
|
end
|
|
end
|
|
|
|
-- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
|
|
-- new_regions is different from included_regions, then outdated regions in included_regions are
|
|
-- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
|
|
-- outdated_regions is invalidated by _iter_regions in else branch.
|
|
if #self:included_regions() ~= #new_regions then
|
|
-- TODO(lewis6991): inefficient; invalidate trees incrementally
|
|
for _, t in pairs(self._trees) do
|
|
self:_do_callback('changedtree', t:included_ranges(true), t)
|
|
end
|
|
self._trees = {}
|
|
self:invalidate()
|
|
else
|
|
self:_iter_regions(function(i, region)
|
|
return vim.deep_equal(new_regions[i], region)
|
|
end)
|
|
end
|
|
|
|
self._regions = new_regions
|
|
end
|
|
|
|
---Gets the set of included regions managed by this LanguageTree. This can be different from the
|
|
---regions set by injection query, because a partial |LanguageTree:parse()| drops the regions
|
|
---outside the requested range.
|
|
---@return table<integer, Range6[]>
|
|
function LanguageTree:included_regions()
|
|
if self._regions then
|
|
return self._regions
|
|
end
|
|
|
|
if not self._has_regions then
|
|
-- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range)
|
|
return { {} }
|
|
end
|
|
|
|
local regions = {} ---@type Range6[][]
|
|
for i, _ in pairs(self._trees) do
|
|
regions[i] = self._trees[i]:included_ranges(true)
|
|
end
|
|
|
|
self._regions = regions
|
|
return regions
|
|
end
|
|
|
|
---@param node TSNode
|
|
---@param source string|integer
|
|
---@param metadata TSMetadata
|
|
---@param include_children boolean
|
|
---@return Range6[]
|
|
local function get_node_ranges(node, source, metadata, include_children)
|
|
local range = vim.treesitter.get_range(node, source, metadata)
|
|
local child_count = node:named_child_count()
|
|
|
|
if include_children or child_count == 0 then
|
|
return { range }
|
|
end
|
|
|
|
local ranges = {} ---@type Range6[]
|
|
|
|
local srow, scol, sbyte, erow, ecol, ebyte = Range.unpack6(range)
|
|
|
|
-- We are excluding children so we need to mask out their ranges
|
|
for i = 0, child_count - 1 do
|
|
local child = assert(node:named_child(i))
|
|
local c_srow, c_scol, c_sbyte, c_erow, c_ecol, c_ebyte = child:range(true)
|
|
if c_srow > srow or c_scol > scol then
|
|
ranges[#ranges + 1] = { srow, scol, sbyte, c_srow, c_scol, c_sbyte }
|
|
end
|
|
srow = c_erow
|
|
scol = c_ecol
|
|
sbyte = c_ebyte
|
|
end
|
|
|
|
if erow > srow or ecol > scol then
|
|
ranges[#ranges + 1] = Range.add_bytes(source, { srow, scol, sbyte, erow, ecol, ebyte })
|
|
end
|
|
|
|
return ranges
|
|
end
|
|
|
|
---@class TSInjectionElem
|
|
---@field combined boolean
|
|
---@field regions Range6[][]
|
|
|
|
---@alias TSInjection table<string,table<integer,TSInjectionElem>>
|
|
|
|
---@param t table<integer,TSInjection>
|
|
---@param tree_index integer
|
|
---@param pattern integer
|
|
---@param lang string
|
|
---@param combined boolean
|
|
---@param ranges Range6[]
|
|
local function add_injection(t, tree_index, pattern, lang, combined, ranges)
|
|
if #ranges == 0 then
|
|
-- Make sure not to add an empty range set as this is interpreted to mean the whole buffer.
|
|
return
|
|
end
|
|
|
|
-- Each tree index should be isolated from the other nodes.
|
|
if not t[tree_index] then
|
|
t[tree_index] = {}
|
|
end
|
|
|
|
if not t[tree_index][lang] then
|
|
t[tree_index][lang] = {}
|
|
end
|
|
|
|
-- Key this by pattern. If combined is set to true all captures of this pattern
|
|
-- will be parsed by treesitter as the same "source".
|
|
-- If combined is false, each "region" will be parsed as a single source.
|
|
if not t[tree_index][lang][pattern] then
|
|
t[tree_index][lang][pattern] = { combined = combined, regions = {} }
|
|
end
|
|
|
|
table.insert(t[tree_index][lang][pattern].regions, ranges)
|
|
end
|
|
|
|
-- TODO(clason): replace by refactored `ts.has_parser` API (without registering)
|
|
--- The result of this function is cached to prevent nvim_get_runtime_file from being
|
|
--- called too often
|
|
--- @param lang string parser name
|
|
--- @return boolean # true if parser for {lang} exists on rtp
|
|
local has_parser = vim.func._memoize(1, function(lang)
|
|
return vim._ts_has_language(lang)
|
|
or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0
|
|
end)
|
|
|
|
--- Return parser name for language (if exists) or filetype (if registered and exists).
|
|
--- Also attempts with the input lower-cased.
|
|
---
|
|
---@param alias string language or filetype name
|
|
---@return string? # resolved parser name
|
|
local function resolve_lang(alias)
|
|
if has_parser(alias) then
|
|
return alias
|
|
end
|
|
|
|
if has_parser(alias:lower()) then
|
|
return alias:lower()
|
|
end
|
|
|
|
local lang = vim.treesitter.language.get_lang(alias)
|
|
if lang and has_parser(lang) then
|
|
return lang
|
|
end
|
|
|
|
lang = vim.treesitter.language.get_lang(alias:lower())
|
|
if lang and has_parser(lang) then
|
|
return lang
|
|
end
|
|
end
|
|
|
|
---@private
|
|
--- Extract injections according to:
|
|
--- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
|
|
---@param match table<integer,TSNode>
|
|
---@param metadata TSMetadata
|
|
---@return string?, boolean, Range6[]
|
|
function LanguageTree:_get_injection(match, metadata)
|
|
local ranges = {} ---@type Range6[]
|
|
local combined = metadata['injection.combined'] ~= nil
|
|
local injection_lang = metadata['injection.language'] --[[@as string?]]
|
|
local lang = metadata['injection.self'] ~= nil and self:lang()
|
|
or metadata['injection.parent'] ~= nil and self._parent_lang
|
|
or (injection_lang and resolve_lang(injection_lang))
|
|
local include_children = metadata['injection.include-children'] ~= nil
|
|
|
|
for id, node in pairs(match) do
|
|
local name = self._injection_query.captures[id]
|
|
-- Lang should override any other language tag
|
|
if name == 'injection.language' then
|
|
local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] })
|
|
lang = resolve_lang(text)
|
|
elseif name == 'injection.content' then
|
|
ranges = get_node_ranges(node, self._source, metadata[id], include_children)
|
|
end
|
|
end
|
|
|
|
return lang, combined, ranges
|
|
end
|
|
|
|
--- Can't use vim.tbl_flatten since a range is just a table.
|
|
---@param regions Range6[][]
|
|
---@return Range6[]
|
|
local function combine_regions(regions)
|
|
local result = {} ---@type Range6[]
|
|
for _, region in ipairs(regions) do
|
|
for _, range in ipairs(region) do
|
|
result[#result + 1] = range
|
|
end
|
|
end
|
|
return result
|
|
end
|
|
|
|
--- Gets language injection regions by language.
|
|
---
|
|
--- This is where most of the injection processing occurs.
|
|
---
|
|
--- TODO: Allow for an offset predicate to tailor the injection range
|
|
--- instead of using the entire nodes range.
|
|
--- @private
|
|
--- @return table<string, Range6[][]>
|
|
function LanguageTree:_get_injections()
|
|
if not self._injection_query then
|
|
return {}
|
|
end
|
|
|
|
---@type table<integer,TSInjection>
|
|
local injections = {}
|
|
|
|
for index, tree in pairs(self._trees) do
|
|
local root_node = tree:root()
|
|
local start_line, _, end_line, _ = root_node:range()
|
|
|
|
for pattern, match, metadata in
|
|
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
|
|
do
|
|
local lang, combined, ranges = self:_get_injection(match, metadata)
|
|
if lang then
|
|
add_injection(injections, index, pattern, lang, combined, ranges)
|
|
else
|
|
self:_log('match from injection query failed for pattern', pattern)
|
|
end
|
|
end
|
|
end
|
|
|
|
---@type table<string,Range6[][]>
|
|
local result = {}
|
|
|
|
-- Generate a map by lang of node lists.
|
|
-- Each list is a set of ranges that should be parsed together.
|
|
for _, lang_map in pairs(injections) do
|
|
for lang, patterns in pairs(lang_map) do
|
|
if not result[lang] then
|
|
result[lang] = {}
|
|
end
|
|
|
|
for _, entry in pairs(patterns) do
|
|
if entry.combined then
|
|
table.insert(result[lang], combine_regions(entry.regions))
|
|
else
|
|
for _, ranges in pairs(entry.regions) do
|
|
table.insert(result[lang], ranges)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
return result
|
|
end
|
|
|
|
---@private
|
|
---@param cb_name TSCallbackName
|
|
function LanguageTree:_do_callback(cb_name, ...)
|
|
for _, cb in ipairs(self._callbacks[cb_name]) do
|
|
cb(...)
|
|
end
|
|
for _, cb in ipairs(self._callbacks_rec[cb_name]) do
|
|
cb(...)
|
|
end
|
|
end
|
|
|
|
---@package
|
|
function LanguageTree:_edit(
|
|
start_byte,
|
|
end_byte_old,
|
|
end_byte_new,
|
|
start_row,
|
|
start_col,
|
|
end_row_old,
|
|
end_col_old,
|
|
end_row_new,
|
|
end_col_new
|
|
)
|
|
for _, tree in pairs(self._trees) do
|
|
tree:edit(
|
|
start_byte,
|
|
end_byte_old,
|
|
end_byte_new,
|
|
start_row,
|
|
start_col,
|
|
end_row_old,
|
|
end_col_old,
|
|
end_row_new,
|
|
end_col_new
|
|
)
|
|
end
|
|
|
|
self._regions = nil
|
|
|
|
local changed_range = {
|
|
start_row,
|
|
start_col,
|
|
start_byte,
|
|
end_row_old,
|
|
end_col_old,
|
|
end_byte_old,
|
|
}
|
|
|
|
-- Validate regions after editing the tree
|
|
self:_iter_regions(function(_, region)
|
|
if #region == 0 then
|
|
-- empty region, use the full source
|
|
return false
|
|
end
|
|
for _, r in ipairs(region) do
|
|
if Range.intercepts(r, changed_range) then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end)
|
|
|
|
for _, child in pairs(self._children) do
|
|
child:_edit(
|
|
start_byte,
|
|
end_byte_old,
|
|
end_byte_new,
|
|
start_row,
|
|
start_col,
|
|
end_row_old,
|
|
end_col_old,
|
|
end_row_new,
|
|
end_col_new
|
|
)
|
|
end
|
|
end
|
|
|
|
---@package
|
|
---@param bufnr integer
|
|
---@param changed_tick integer
|
|
---@param start_row integer
|
|
---@param start_col integer
|
|
---@param start_byte integer
|
|
---@param old_row integer
|
|
---@param old_col integer
|
|
---@param old_byte integer
|
|
---@param new_row integer
|
|
---@param new_col integer
|
|
---@param new_byte integer
|
|
function LanguageTree:_on_bytes(
|
|
bufnr,
|
|
changed_tick,
|
|
start_row,
|
|
start_col,
|
|
start_byte,
|
|
old_row,
|
|
old_col,
|
|
old_byte,
|
|
new_row,
|
|
new_col,
|
|
new_byte
|
|
)
|
|
local old_end_col = old_col + ((old_row == 0) and start_col or 0)
|
|
local new_end_col = new_col + ((new_row == 0) and start_col or 0)
|
|
|
|
self:_log(
|
|
'on_bytes',
|
|
bufnr,
|
|
changed_tick,
|
|
start_row,
|
|
start_col,
|
|
start_byte,
|
|
old_row,
|
|
old_col,
|
|
old_byte,
|
|
new_row,
|
|
new_col,
|
|
new_byte
|
|
)
|
|
|
|
-- Edit trees together BEFORE emitting a bytes callback.
|
|
self:_edit(
|
|
start_byte,
|
|
start_byte + old_byte,
|
|
start_byte + new_byte,
|
|
start_row,
|
|
start_col,
|
|
start_row + old_row,
|
|
old_end_col,
|
|
start_row + new_row,
|
|
new_end_col
|
|
)
|
|
|
|
self:_do_callback(
|
|
'bytes',
|
|
bufnr,
|
|
changed_tick,
|
|
start_row,
|
|
start_col,
|
|
start_byte,
|
|
old_row,
|
|
old_col,
|
|
old_byte,
|
|
new_row,
|
|
new_col,
|
|
new_byte
|
|
)
|
|
end
|
|
|
|
---@package
|
|
function LanguageTree:_on_reload()
|
|
self:invalidate(true)
|
|
end
|
|
|
|
---@package
|
|
function LanguageTree:_on_detach(...)
|
|
self:invalidate(true)
|
|
self:_do_callback('detach', ...)
|
|
if self._logfile then
|
|
self._logger('nvim', 'detaching')
|
|
self._logger = nil
|
|
self._logfile:close()
|
|
end
|
|
end
|
|
|
|
--- Registers callbacks for the |LanguageTree|.
|
|
---@param cbs table An |nvim_buf_attach()|-like table argument with the following handlers:
|
|
--- - `on_bytes` : see |nvim_buf_attach()|, but this will be called _after_ the parsers callback.
|
|
--- - `on_changedtree` : a callback that will be called every time the tree has syntactical changes.
|
|
--- It will be passed two arguments: a table of the ranges (as node ranges) that
|
|
--- changed and the changed tree.
|
|
--- - `on_child_added` : emitted when a child is added to the tree.
|
|
--- - `on_child_removed` : emitted when a child is removed from the tree.
|
|
--- - `on_detach` : emitted when the buffer is detached, see |nvim_buf_detach_event|.
|
|
--- Takes one argument, the number of the buffer.
|
|
--- @param recursive? boolean Apply callbacks recursively for all children. Any new children will
|
|
--- also inherit the callbacks.
|
|
function LanguageTree:register_cbs(cbs, recursive)
|
|
---@cast cbs table<TSCallbackNameOn,function>
|
|
if not cbs then
|
|
return
|
|
end
|
|
|
|
local callbacks = recursive and self._callbacks_rec or self._callbacks
|
|
|
|
for name, cbname in pairs(TSCallbackNames) do
|
|
if cbs[name] then
|
|
table.insert(callbacks[cbname], cbs[name])
|
|
end
|
|
end
|
|
|
|
if recursive then
|
|
for _, child in pairs(self._children) do
|
|
child:register_cbs(cbs, true)
|
|
end
|
|
end
|
|
end
|
|
|
|
---@param tree TSTree
|
|
---@param range Range
|
|
---@return boolean
|
|
local function tree_contains(tree, range)
|
|
return Range.contains({ tree:root():range() }, range)
|
|
end
|
|
|
|
--- Determines whether {range} is contained in the |LanguageTree|.
|
|
---
|
|
---@param range Range4 `{ start_line, start_col, end_line, end_col }`
|
|
---@return boolean
|
|
function LanguageTree:contains(range)
|
|
for _, tree in pairs(self._trees) do
|
|
if tree_contains(tree, range) then
|
|
return true
|
|
end
|
|
end
|
|
|
|
return false
|
|
end
|
|
|
|
--- Gets the tree that contains {range}.
|
|
---
|
|
---@param range Range4 `{ start_line, start_col, end_line, end_col }`
|
|
---@param opts table|nil Optional keyword arguments:
|
|
--- - ignore_injections boolean Ignore injected languages (default true)
|
|
---@return TSTree|nil
|
|
function LanguageTree:tree_for_range(range, opts)
|
|
opts = opts or {}
|
|
local ignore = vim.F.if_nil(opts.ignore_injections, true)
|
|
|
|
if not ignore then
|
|
for _, child in pairs(self._children) do
|
|
local tree = child:tree_for_range(range, opts)
|
|
if tree then
|
|
return tree
|
|
end
|
|
end
|
|
end
|
|
|
|
for _, tree in pairs(self._trees) do
|
|
if tree_contains(tree, range) then
|
|
return tree
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
|
|
--- Gets the smallest named node that contains {range}.
|
|
---
|
|
---@param range Range4 `{ start_line, start_col, end_line, end_col }`
|
|
---@param opts table|nil Optional keyword arguments:
|
|
--- - ignore_injections boolean Ignore injected languages (default true)
|
|
---@return TSNode | nil Found node
|
|
function LanguageTree:named_node_for_range(range, opts)
|
|
local tree = self:tree_for_range(range, opts)
|
|
if tree then
|
|
return tree:root():named_descendant_for_range(unpack(range))
|
|
end
|
|
end
|
|
|
|
--- Gets the appropriate language that contains {range}.
|
|
---
|
|
---@param range Range4 `{ start_line, start_col, end_line, end_col }`
|
|
---@return LanguageTree Managing {range}
|
|
function LanguageTree:language_for_range(range)
|
|
for _, child in pairs(self._children) do
|
|
if child:contains(range) then
|
|
return child:language_for_range(range)
|
|
end
|
|
end
|
|
|
|
return self
|
|
end
|
|
|
|
return LanguageTree
|