fix(treesitter): _trees may not be list-like

Problem:
With incremental injection parsing, injected languages' parsers parse
only the relevant regions and stores the result in _trees with the index
of the corresponding region. Therefore, there can be holes in _trees.

Solution:
* Use generic table functions where appropriate.
* Fix type annotations and docs.
This commit is contained in:
Jaehwang Jung 2023-09-17 17:54:30 +09:00 committed by Lewis Russell
parent f54677132b
commit 71d9b7d15c
3 changed files with 52 additions and 31 deletions

View File

@ -1101,10 +1101,12 @@ LanguageTree:for_each_tree({fn}) *LanguageTree:for_each_tree()*
• {fn} fun(tree: TSTree, ltree: LanguageTree) • {fn} fun(tree: TSTree, ltree: LanguageTree)
LanguageTree:included_regions() *LanguageTree:included_regions()* LanguageTree:included_regions() *LanguageTree:included_regions()*
Gets the set of included regions Gets the set of included regions managed by this LanguageTree . This can be different from the regions set by injection query, because a
partial |LanguageTree:parse()| drops the regions outside the requested
range.
Return: ~ Return: ~
Range6[][] table<integer, Range6[]>
LanguageTree:invalidate({reload}) *LanguageTree:invalidate()* LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
Invalidates this parser and all its children Invalidates this parser and all its children
@ -1113,10 +1115,12 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
• {reload} (boolean|nil) • {reload} (boolean|nil)
LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()* LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()*
Determines whether this tree is valid. If the tree is invalid, call `parse()` . This will return the updated tree. Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest state of the
source. If invalid, user should call |LanguageTree:parse()|.
Parameters: ~ Parameters: ~
• {exclude_children} (boolean|nil) • {exclude_children} (boolean|nil) whether to ignore the validity of
children (default `false`)
Return: ~ Return: ~
(boolean) (boolean)
@ -1165,7 +1169,7 @@ LanguageTree:parse({range}) *LanguageTree:parse()*
injections). injections).
Return: ~ Return: ~
TSTree[] table<integer, TSTree>
*LanguageTree:register_cbs()* *LanguageTree:register_cbs()*
LanguageTree:register_cbs({cbs}, {recursive}) LanguageTree:register_cbs({cbs}, {recursive})
@ -1207,7 +1211,12 @@ LanguageTree:tree_for_range({range}, {opts})
TSTree|nil TSTree|nil
LanguageTree:trees() *LanguageTree:trees()* LanguageTree:trees() *LanguageTree:trees()*
Returns all trees this language tree contains. Does not include child Returns all trees of the regions parsed by this parser. Does not include
languages. child languages. The result is list-like if
• this LanguageTree is the root, in which case the result is empty or a singleton list; or
• the root LanguageTree is fully parsed.
Return: ~
table<integer, TSTree>
vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl: vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:

View File

@ -50,7 +50,8 @@ function TSNode:_rawquery(query, captures, start, end_, opts) end
---@alias TSLoggerCallback fun(logtype: 'parse'|'lex', msg: string) ---@alias TSLoggerCallback fun(logtype: 'parse'|'lex', msg: string)
---@class TSParser ---@class TSParser
---@field parse fun(self: TSParser, tree: TSTree?, source: integer|string, include_bytes: boolean?): TSTree, integer[] ---@field parse fun(self: TSParser, tree: TSTree?, source: integer|string, include_bytes: true): TSTree, Range6[]
---@field parse fun(self: TSParser, tree: TSTree?, source: integer|string, include_bytes: false|nil): TSTree, Range4[]
---@field reset fun(self: TSParser) ---@field reset fun(self: TSParser)
---@field included_ranges fun(self: TSParser, include_bytes: boolean?): integer[] ---@field included_ranges fun(self: TSParser, include_bytes: boolean?): integer[]
---@field set_included_ranges fun(self: TSParser, ranges: (Range6|TSNode)[]) ---@field set_included_ranges fun(self: TSParser, ranges: (Range6|TSNode)[])

View File

@ -78,13 +78,14 @@ local TSCallbackNames = {
---@field private _opts table Options ---@field private _opts table Options
---@field private _parser TSParser Parser for language ---@field private _parser TSParser Parser for language
---@field private _has_regions boolean ---@field private _has_regions boolean
---@field private _regions Range6[][]? ---@field private _regions table<integer, Range6[]>?
---List of regions this tree should manage and parse. If nil then regions are ---List of regions this tree should manage and parse. If nil then regions are
---taken from _trees. This is mostly a short-lived cache for included_regions() ---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _lang string Language name ---@field private _lang string Language name
---@field private _parent_lang? string Parent language name ---@field private _parent_lang? string Parent language name
---@field private _source (integer|string) Buffer or string to parse ---@field private _source (integer|string) Buffer or string to parse
---@field private _trees TSTree[] Reference to parsed tree (one for each language) ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid.
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid ---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
---@field private _logger? fun(logtype: string, msg: string) ---@field private _logger? fun(logtype: string, msg: string)
---@field private _logfile? file* ---@field private _logfile? file*
@ -211,7 +212,7 @@ function LanguageTree:_log(...)
end end
local info = debug.getinfo(2, 'nl') local info = debug.getinfo(2, 'nl')
local nregions = #self:included_regions() local nregions = vim.tbl_count(self:included_regions())
local prefix = local prefix =
string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions) string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions)
@ -244,8 +245,13 @@ function LanguageTree:invalidate(reload)
end end
end end
--- Returns all trees this language tree contains. --- Returns all trees of the regions parsed by this parser.
--- Does not include child languages. --- Does not include child languages.
--- The result is list-like if
--- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
--- * the root LanguageTree is fully parsed.
---
---@return table<integer, TSTree>
function LanguageTree:trees() function LanguageTree:trees()
return self._trees return self._trees
end end
@ -255,16 +261,15 @@ function LanguageTree:lang()
return self._lang return self._lang
end end
--- Determines whether this tree is valid. --- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- If the tree is invalid, call `parse()`. --- state of the source. If invalid, user should call |LanguageTree:parse()|.
--- This will return the updated tree. ---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
---@param exclude_children boolean|nil
---@return boolean ---@return boolean
function LanguageTree:is_valid(exclude_children) function LanguageTree:is_valid(exclude_children)
local valid = self._valid local valid = self._valid
if type(valid) == 'table' then if type(valid) == 'table' then
for i = 1, #self:included_regions() do for i, _ in pairs(self:included_regions()) do
if not valid[i] then if not valid[i] then
return false return false
end end
@ -328,7 +333,7 @@ end
--- @private --- @private
--- @param range boolean|Range? --- @param range boolean|Range?
--- @return integer[] changes --- @return Range6[] changes
--- @return integer no_regions_parsed --- @return integer no_regions_parsed
--- @return number total_parse_time --- @return number total_parse_time
function LanguageTree:_parse_regions(range) function LanguageTree:_parse_regions(range)
@ -370,7 +375,7 @@ function LanguageTree:_add_injections()
local seen_langs = {} ---@type table<string,boolean> local seen_langs = {} ---@type table<string,boolean>
local query_time, injections_by_lang = tcall(self._get_injections, self) local query_time, injections_by_lang = tcall(self._get_injections, self)
for lang, injection_ranges in pairs(injections_by_lang) do for lang, injection_regions in pairs(injections_by_lang) do
local has_lang = pcall(language.add, lang) local has_lang = pcall(language.add, lang)
-- Child language trees should just be ignored if not found, since -- Child language trees should just be ignored if not found, since
@ -383,7 +388,7 @@ function LanguageTree:_add_injections()
child = self:add_child(lang) child = self:add_child(lang)
end end
child:set_included_regions(injection_ranges) child:set_included_regions(injection_regions)
seen_langs[lang] = true seen_langs[lang] = true
end end
end end
@ -408,14 +413,14 @@ end
--- Set to `true` to run a complete parse of the source (Note: Can be slow!) --- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically --- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections). --- only the root tree without injections).
--- @return TSTree[] --- @return table<integer, TSTree>
function LanguageTree:parse(range) function LanguageTree:parse(range)
if self:is_valid() then if self:is_valid() then
self:_log('valid') self:_log('valid')
return self._trees return self._trees
end end
local changes --- @type Range6? local changes --- @type Range6[]?
-- Collect some stats -- Collect some stats
local no_regions_parsed = 0 local no_regions_parsed = 0
@ -565,7 +570,7 @@ function LanguageTree:_iter_regions(fn)
local all_valid = true local all_valid = true
for i, region in ipairs(self:included_regions()) do for i, region in pairs(self:included_regions()) do
if was_valid or self._valid[i] then if was_valid or self._valid[i] then
self._valid[i] = fn(i, region) self._valid[i] = fn(i, region)
if not self._valid[i] then if not self._valid[i] then
@ -601,7 +606,7 @@ end
--- nodes, which is useful for templating languages like ERB and EJS. --- nodes, which is useful for templating languages like ERB and EJS.
--- ---
---@private ---@private
---@param new_regions Range6[][] List of regions this tree should manage and parse. ---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse.
function LanguageTree:set_included_regions(new_regions) function LanguageTree:set_included_regions(new_regions)
self._has_regions = true self._has_regions = true
@ -609,16 +614,20 @@ function LanguageTree:set_included_regions(new_regions)
for _, region in ipairs(new_regions) do for _, region in ipairs(new_regions) do
for i, range in ipairs(region) do for i, range in ipairs(region) do
if type(range) == 'table' and #range == 4 then if type(range) == 'table' and #range == 4 then
region[i] = Range.add_bytes(self._source, range) region[i] = Range.add_bytes(self._source, range --[[@as Range4]])
elseif type(range) == 'userdata' then elseif type(range) == 'userdata' then
region[i] = { range:range(true) } region[i] = { range:range(true) }
end end
end end
end end
-- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
-- new_regions is different from included_regions, then outdated regions in included_regions are
-- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
-- outdated_regions is invalidated by _iter_regions in else branch.
if #self:included_regions() ~= #new_regions then if #self:included_regions() ~= #new_regions then
-- TODO(lewis6991): inefficient; invalidate trees incrementally -- TODO(lewis6991): inefficient; invalidate trees incrementally
for _, t in ipairs(self._trees) do for _, t in pairs(self._trees) do
self:_do_callback('changedtree', t:included_ranges(true), t) self:_do_callback('changedtree', t:included_ranges(true), t)
end end
self._trees = {} self._trees = {}
@ -632,20 +641,22 @@ function LanguageTree:set_included_regions(new_regions)
self._regions = new_regions self._regions = new_regions
end end
---Gets the set of included regions ---Gets the set of included regions managed by this LanguageTree. This can be different from the
---@return Range6[][] ---regions set by injection query, because a partial |LanguageTree:parse()| drops the regions
---outside the requested range.
---@return table<integer, Range6[]>
function LanguageTree:included_regions() function LanguageTree:included_regions()
if self._regions then if self._regions then
return self._regions return self._regions
end end
if not self._has_regions or #self._trees == 0 then if not self._has_regions or next(self._trees) == nil then
-- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1}
return { {} } return { {} }
end end
local regions = {} ---@type Range6[][] local regions = {} ---@type Range6[][]
for i, _ in ipairs(self._trees) do for i, _ in pairs(self._trees) do
regions[i] = self._trees[i]:included_ranges(true) regions[i] = self._trees[i]:included_ranges(true)
end end
@ -801,7 +812,7 @@ local function combine_regions(regions)
return result return result
end end
--- Gets language injection points by language. --- Gets language injection regions by language.
--- ---
--- This is where most of the injection processing occurs. --- This is where most of the injection processing occurs.
--- ---