feat(treesitter): async parsing

**Problem:** Parsing can be slow for large files, and it is a blocking
operation which can be disruptive and annoying.

**Solution:** Provide a function for asynchronous parsing, which accepts
a callback to be run after parsing completes.

Co-authored-by: Lewis Russell <lewis6991@gmail.com>
This commit is contained in:
Riley Bruins 2024-12-18 10:48:33 -08:00
parent 02bc40c194
commit 103ad6f1e6
8 changed files with 212 additions and 26 deletions

View File

@ -266,6 +266,7 @@ PERFORMANCE
• Significantly reduced redraw time for long lines with treesitter
highlighting.
• Treesitter highlighting is now asynchronous.
PLUGINS
@ -303,6 +304,8 @@ TREESITTER
• |treesitter-directive-trim!| can trim all whitespace (not just empty lines)
from both sides of a node.
• |vim.treesitter.get_captures_at_pos()| now returns the `id` of each capture
• |LanguageTree:parse()| optionally supports asynchronous invocation, which is
activated by passing the `on_parse` callback parameter.
TUI

View File

@ -4657,8 +4657,8 @@ A jump table for the options with a short description can be found at |Q_op|.
'redrawtime' 'rdt' number (default 2000)
global
Time in milliseconds for redrawing the display. Applies to
'hlsearch', 'inccommand', |:match| highlighting and syntax
highlighting.
'hlsearch', 'inccommand', |:match| highlighting, syntax highlighting,
and async |LanguageTree:parse()|.
When redrawing takes more than this many milliseconds no further
matches will be highlighted.
For syntax highlighting the time applies per window. When over the

View File

@ -1091,6 +1091,9 @@ start({bufnr}, {lang}) *vim.treesitter.start()*
required for some plugins. In this case, add `vim.bo.syntax = 'on'` after
the call to `start`.
Note: By default, the highlighter parses code asynchronously, using a
segment time of 3ms.
Example: >lua
vim.api.nvim_create_autocmd( 'FileType', { pattern = 'tex',
callback = function(args)
@ -1594,7 +1597,7 @@ LanguageTree:node_for_range({range}, {opts})
Return: ~
(`TSNode?`)
LanguageTree:parse({range}) *LanguageTree:parse()*
LanguageTree:parse({range}, {on_parse}) *LanguageTree:parse()*
Recursively parse all regions in the language tree using
|treesitter-parsers| for the corresponding languages and run injection
queries on the parsed trees to determine whether child trees should be
@ -1605,14 +1608,20 @@ LanguageTree:parse({range}) *LanguageTree:parse()*
if {range} is `true`).
Parameters: ~
• {range} (`boolean|Range?`) Parse this range in the parser's source.
Set to `true` to run a complete parse of the source (Note:
Can be slow!) Set to `false|nil` to only parse regions with
empty ranges (typically only the root tree without
injections).
• {range} (`boolean|Range?`) Parse this range in the parser's
source. Set to `true` to run a complete parse of the
source (Note: Can be slow!) Set to `false|nil` to only
parse regions with empty ranges (typically only the root
tree without injections).
• {on_parse} (`fun(trees: TSTrees, timeout_reached: boolean)?`)
Function invoked when parsing completes. When provided,
parsing will run asynchronously. The function is passed
the list of trees returned by the parse, as well as a
boolean indicating whether or not the parse timed out,
determined by 'redrawtime'.
Return: ~
(`table<integer, TSTree>`)
(`TSTrees?`)
*LanguageTree:register_cbs()*
LanguageTree:register_cbs({cbs}, {recursive})
@ -1666,7 +1675,7 @@ LanguageTree:trees() *LanguageTree:trees()*
• the root LanguageTree is fully parsed.
Return: ~
(`table<integer, TSTree>`)
(`TSTrees`)
vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:

View File

@ -61,7 +61,7 @@ function M._create_parser(bufnr, lang, opts)
{ on_bytes = bytes_cb, on_detach = detach_cb, on_reload = reload_cb, preview = true }
)
self:parse()
self:parse(nil, function() end)
return self
end
@ -397,6 +397,8 @@ end
--- Note: By default, disables regex syntax highlighting, which may be required for some plugins.
--- In this case, add `vim.bo.syntax = 'on'` after the call to `start`.
---
--- Note: By default, the highlighter parses code asynchronously, using a segment time of 3ms.
---
--- Example:
---
--- ```lua

View File

@ -82,7 +82,6 @@ TSHighlighter.__index = TSHighlighter
---@param tree vim.treesitter.LanguageTree parser object to use for highlighting
---@param opts (table|nil) Configuration of the highlighter:
--- - queries table overwrite queries used by the highlighter
---@return vim.treesitter.highlighter Created highlighter object
function TSHighlighter.new(tree, opts)
local self = setmetatable({}, TSHighlighter)
@ -147,7 +146,7 @@ function TSHighlighter.new(tree, opts)
vim.opt_local.spelloptions:append('noplainbuffer')
end)
self.tree:parse()
self.tree:parse(nil, function() end)
return self
end
@ -391,12 +390,22 @@ function TSHighlighter._on_win(_, _win, buf, topline, botline)
if not self then
return false
end
self.tree:parse({ topline, botline + 1 })
self:prepare_highlight_states(topline, botline + 1)
local range = { topline, botline + 1 }
self.tree:parse(range, function(_, timeout_reached)
if not timeout_reached then
self:_async_parse_callback(range)
end
end)
self.redraw_count = self.redraw_count + 1
return true
end
--- @param range [integer, integer]
function TSHighlighter:_async_parse_callback(range)
self:prepare_highlight_states(unpack(range))
api.nvim__redraw({ buf = self.bufnr, range = range, flush = false })
end
api.nvim_set_decoration_provider(ns, {
on_win = TSHighlighter._on_win,
on_line = TSHighlighter._on_line,

View File

@ -44,6 +44,8 @@ local query = require('vim.treesitter.query')
local language = require('vim.treesitter.language')
local Range = require('vim.treesitter._range')
local default_parse_timeout_ms = 3
---@alias TSCallbackName
---| 'changedtree'
---| 'bytes'
@ -58,6 +60,8 @@ local Range = require('vim.treesitter._range')
---| 'on_child_added'
---| 'on_child_removed'
---@alias TSTrees table<integer, TSTree>
--- @type table<TSCallbackNameOn,TSCallbackName>
local TSCallbackNames = {
on_changedtree = 'changedtree',
@ -83,7 +87,7 @@ local TSCallbackNames = {
---@field private _lang string Language name
---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
---@field private _source (integer|string) Buffer or string to parse
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---@field private _trees TSTrees Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid.
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
---@field private _logger? fun(logtype: string, msg: string)
@ -98,6 +102,10 @@ local LanguageTree = {}
LanguageTree.__index = LanguageTree
function LanguageTree:_buf()
return vim.b[self._source]
end
--- @nodoc
---
--- LanguageTree contains a tree of parsers: the root treesitter parser for {lang} and any
@ -252,7 +260,7 @@ end
--- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
--- * the root LanguageTree is fully parsed.
---
---@return table<integer, TSTree>
---@return TSTrees
function LanguageTree:trees()
return self._trees
end
@ -334,10 +342,12 @@ end
--- @private
--- @param range boolean|Range?
--- @param timeout integer?
--- @return Range6[] changes
--- @return integer no_regions_parsed
--- @return number total_parse_time
function LanguageTree:_parse_regions(range)
--- @return boolean is_unfinished whether async parsing still needs time
function LanguageTree:_parse_regions(range, timeout)
local changes = {}
local no_regions_parsed = 0
local total_parse_time = 0
@ -357,9 +367,18 @@ function LanguageTree:_parse_regions(range)
)
then
self._parser:set_included_ranges(ranges)
if timeout then
self._parser:set_timeout(timeout * 1000) -- ms -> micros
else
self._parser:set_timeout(0)
end
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
if not tree then
return changes, no_regions_parsed, total_parse_time, true
end
-- Pass ranges if this is an initial parse
local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
@ -373,7 +392,7 @@ function LanguageTree:_parse_regions(range)
end
end
return changes, no_regions_parsed, total_parse_time
return changes, no_regions_parsed, total_parse_time, false
end
--- @private
@ -409,6 +428,45 @@ function LanguageTree:_add_injections()
return query_time
end
--- Run an asynchronous parse, calling {on_parse} when complete.
---
--- @param range boolean|Range|nil
--- @param on_parse fun(trees: TSTrees, timeout_expired: boolean)
function LanguageTree:_async_parse(range, on_parse)
coroutine.resume(coroutine.create(function()
local co = coroutine.running()
local ct = self:_buf().changedtick
local total_parse_time = 0
local redrawtime = vim.o.redrawtime
---@type TSTrees, boolean
local trees, unfinished
while total_parse_time <= redrawtime do
-- If buffer was changed in the middle of parsing, reset parse state
if self:_buf().changedtick ~= ct then
self._parser:reset()
self:invalidate()
total_parse_time = 0
coroutine.yield()
end
trees, unfinished = self:_parse(range, default_parse_timeout_ms)
total_parse_time = total_parse_time + default_parse_timeout_ms
if unfinished then
vim.schedule(function()
coroutine.resume(co)
end)
coroutine.yield()
else
break
end
end
on_parse(trees, unfinished)
end))
end
--- Recursively parse all regions in the language tree using |treesitter-parsers|
--- for the corresponding languages and run injection queries on the parsed trees
--- to determine whether child trees should be created and parsed.
@ -420,11 +478,27 @@ end
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections).
--- @return table<integer, TSTree>
function LanguageTree:parse(range)
--- @param on_parse fun(trees: TSTrees, timeout_expired: boolean)? Function invoked when parsing
--- completes. When provided, parsing will run asynchronously. The function is passed the list
--- of trees returned by the parse, as well as a boolean indicating whether or not the parse
--- timed out, determined by 'redrawtime'.
--- @return TSTrees?
function LanguageTree:parse(range, on_parse)
if on_parse then
return self:_async_parse(range, on_parse)
end
local trees, _ = self:_parse(range)
return trees
end
--- @param range boolean|Range|nil
--- @param timeout integer?
--- @return TSTrees
--- @return boolean
function LanguageTree:_parse(range, timeout)
if self:is_valid() then
self:_log('valid')
return self._trees
return self._trees, false
end
local changes --- @type Range6[]?
@ -433,10 +507,15 @@ function LanguageTree:parse(range)
local no_regions_parsed = 0
local query_time = 0
local total_parse_time = 0
local is_unfinished --- @type boolean
-- At least 1 region is invalid
if not self:is_valid(true) then
changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
changes, no_regions_parsed, total_parse_time, is_unfinished =
self:_parse_regions(range, timeout)
if is_unfinished then
return self._trees, is_unfinished
end
-- Need to run injections when we parsed something
if no_regions_parsed > 0 then
self._injections_processed = false
@ -457,10 +536,13 @@ function LanguageTree:parse(range)
})
for _, child in pairs(self._children) do
child:parse(range)
local _, child_unfinished = child:_parse(range, timeout)
if child_unfinished then
return self._trees, child_unfinished
end
end
return self._trees
return self._trees, false
end
--- Invokes the callback for each |LanguageTree| recursively.

View File

@ -488,7 +488,11 @@ static int parser_parse(lua_State *L)
// Sometimes parsing fails (timeout, or wrong parser ABI)
// In those case, just return an error.
if (!new_tree) {
return luaL_error(L, "An error occurred when parsing.");
if (ts_parser_timeout_micros(p) == 0) {
// No timeout set, must have had an error
return luaL_error(L, "An error occurred when parsing.");
}
return 0;
}
// The new tree will be pushed to the stack, without copy, ownership is now to the lua GC.

View File

@ -90,6 +90,83 @@ describe('treesitter parser API', function()
eq(true, exec_lua('return parser:parse()[1] == tree2'))
end)
it('parses buffer asynchronously', function()
insert([[
int main() {
int x = 3;
}]])
exec_lua(function()
_G.parser = vim.treesitter.get_parser(0, 'c')
_G.lang = vim.treesitter.language.inspect('c')
_G.parser:parse(nil, function(trees)
_G.tree = trees[1]
_G.root = _G.tree:root()
end)
vim.wait(100, function() end)
end)
eq('<tree>', exec_lua('return tostring(tree)'))
eq('<node translation_unit>', exec_lua('return tostring(root)'))
eq({ 0, 0, 3, 0 }, exec_lua('return {root:range()}'))
eq(1, exec_lua('return root:child_count()'))
exec_lua('child = root:child(0)')
eq('<node function_definition>', exec_lua('return tostring(child)'))
eq({ 0, 0, 2, 1 }, exec_lua('return {child:range()}'))
eq('function_definition', exec_lua('return child:type()'))
eq(true, exec_lua('return child:named()'))
eq('number', type(exec_lua('return child:symbol()')))
eq(true, exec_lua('return lang.symbols[child:type()]'))
exec_lua('anon = root:descendant_for_range(0,8,0,9)')
eq('(', exec_lua('return anon:type()'))
eq(false, exec_lua('return anon:named()'))
eq('number', type(exec_lua('return anon:symbol()')))
eq(false, exec_lua([=[return lang.symbols[string.format('"%s"', anon:type())]]=]))
exec_lua('descendant = root:descendant_for_range(1,2,1,12)')
eq('<node declaration>', exec_lua('return tostring(descendant)'))
eq({ 1, 2, 1, 12 }, exec_lua('return {descendant:range()}'))
eq(
'(declaration type: (primitive_type) declarator: (init_declarator declarator: (identifier) value: (number_literal)))',
exec_lua('return descendant:sexpr()')
)
feed('2G7|ay')
exec_lua(function()
_G.parser:parse(nil, function(trees)
_G.tree2 = trees[1]
_G.root2 = _G.tree2:root()
_G.descendant2 = _G.root2:descendant_for_range(1, 2, 1, 13)
end)
vim.wait(100, function() end)
end)
eq(false, exec_lua('return tree2 == tree1'))
eq(false, exec_lua('return root2 == root'))
eq('<node declaration>', exec_lua('return tostring(descendant2)'))
eq({ 1, 2, 1, 13 }, exec_lua('return {descendant2:range()}'))
eq(true, exec_lua('return child == child'))
-- separate lua object, but represents same node
eq(true, exec_lua('return child == root:child(0)'))
eq(false, exec_lua('return child == descendant2'))
eq(false, exec_lua('return child == nil'))
eq(false, exec_lua('return child == tree'))
eq('string', exec_lua('return type(child:id())'))
eq(true, exec_lua('return child:id() == child:id()'))
-- separate lua object, but represents same node
eq(true, exec_lua('return child:id() == root:child(0):id()'))
eq(false, exec_lua('return child:id() == descendant2:id()'))
eq(false, exec_lua('return child:id() == nil'))
eq(false, exec_lua('return child:id() == tree'))
-- unchanged buffer: return the same tree
eq(true, exec_lua('return parser:parse()[1] == tree2'))
end)
local test_text = [[
void ui_refresh(void)
{