treesitter: add string parser (#13008)

This commit is contained in:
Thomas Vigouroux 2020-09-30 15:32:43 +02:00 committed by GitHub
parent d5adc8c00e
commit 3c5141d2cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 35 deletions

View File

@ -21,7 +21,9 @@ function Parser:parse()
return self.tree return self.tree
end end
local changes local changes
self.tree, changes = self._parser:parse_buf(self.bufnr)
self.tree, changes = self._parser:parse(self:input_source())
self.valid = true self.valid = true
if not vim.tbl_isempty(changes) then if not vim.tbl_isempty(changes) then
@ -33,6 +35,10 @@ function Parser:parse()
return self.tree, changes return self.tree, changes
end end
function Parser:input_source()
return self.bufnr or self.str
end
function Parser:_on_bytes(bufnr, changed_tick, function Parser:_on_bytes(bufnr, changed_tick,
start_row, start_col, start_byte, start_row, start_col, start_byte,
old_row, old_col, old_byte, old_row, old_col, old_byte,
@ -152,4 +158,18 @@ function M.get_parser(bufnr, lang, buf_attach_cbs)
return parsers[id] return parsers[id]
end end
function M.get_string_parser(str, lang)
vim.validate {
str = { str, 'string' },
lang = { lang, 'string' }
}
language.require_language(lang)
local self = setmetatable({str=str, lang=lang, valid=false}, Parser)
self._parser = vim._create_ts_parser(lang)
self:parse()
return self
end
return M return M

View File

@ -28,21 +28,27 @@ end
--- Gets the text corresponding to a given node --- Gets the text corresponding to a given node
-- @param node the node -- @param node the node
-- @param bufnr the buffer from which the node in extracted. -- @param bufnr the buffer from which the node in extracted.
function M.get_node_text(node, bufnr) function M.get_node_text(node, source)
local start_row, start_col, end_row, end_col = node:range() local start_row, start_col, start_byte = node:start()
if start_row ~= end_row then local end_row, end_col, end_byte = node:end_()
return nil
if type(source) == "number" then
if start_row ~= end_row then
return nil
end
local line = a.nvim_buf_get_lines(source, start_row, start_row+1, true)[1]
return string.sub(line, start_col+1, end_col)
elseif type(source) == "string" then
return source:sub(start_byte+1, end_byte)
end end
local line = a.nvim_buf_get_lines(bufnr, start_row, start_row+1, true)[1]
return string.sub(line, start_col+1, end_col)
end end
-- Predicate handler receive the following arguments -- Predicate handler receive the following arguments
-- (match, pattern, bufnr, predicate) -- (match, pattern, bufnr, predicate)
local predicate_handlers = { local predicate_handlers = {
["eq?"] = function(match, _, bufnr, predicate) ["eq?"] = function(match, _, source, predicate)
local node = match[predicate[2]] local node = match[predicate[2]]
local node_text = M.get_node_text(node, bufnr) local node_text = M.get_node_text(node, source)
local str local str
if type(predicate[3]) == "string" then if type(predicate[3]) == "string" then
@ -50,7 +56,7 @@ local predicate_handlers = {
str = predicate[3] str = predicate[3]
else else
-- (#eq? @aa @bb) -- (#eq? @aa @bb)
str = M.get_node_text(match[predicate[3]], bufnr) str = M.get_node_text(match[predicate[3]], source)
end end
if node_text ~= str or str == nil then if node_text ~= str or str == nil then
@ -60,7 +66,7 @@ local predicate_handlers = {
return true return true
end, end,
["lua-match?"] = function(match, _, bufnr, predicate) ["lua-match?"] = function(match, _, source, predicate)
local node = match[predicate[2]] local node = match[predicate[2]]
local regex = predicate[3] local regex = predicate[3]
local start_row, _, end_row, _ = node:range() local start_row, _, end_row, _ = node:range()
@ -68,7 +74,7 @@ local predicate_handlers = {
return false return false
end end
return string.find(M.get_node_text(node, bufnr), regex) return string.find(M.get_node_text(node, source), regex)
end, end,
["match?"] = (function() ["match?"] = (function()
@ -88,7 +94,7 @@ local predicate_handlers = {
end end
}) })
return function(match, _, bufnr, pred) return function(match, _, source, pred)
local node = match[pred[2]] local node = match[pred[2]]
local start_row, start_col, end_row, end_col = node:range() local start_row, start_col, end_row, end_col = node:range()
if start_row ~= end_row then if start_row ~= end_row then
@ -96,13 +102,13 @@ local predicate_handlers = {
end end
local regex = compiled_vim_regexes[pred[3]] local regex = compiled_vim_regexes[pred[3]]
return regex:match_line(bufnr, start_row, start_col, end_col) return regex:match_line(source, start_row, start_col, end_col)
end end
end)(), end)(),
["contains?"] = function(match, _, bufnr, predicate) ["contains?"] = function(match, _, source, predicate)
local node = match[predicate[2]] local node = match[predicate[2]]
local node_text = M.get_node_text(node, bufnr) local node_text = M.get_node_text(node, source)
for i=3,#predicate do for i=3,#predicate do
if string.find(node_text, predicate[i], 1, true) then if string.find(node_text, predicate[i], 1, true) then
@ -139,7 +145,7 @@ local function xor(x, y)
return (x or y) and not (x and y) return (x or y) and not (x and y)
end end
function Query:match_preds(match, pattern, bufnr) function Query:match_preds(match, pattern, source)
local preds = self.info.patterns[pattern] local preds = self.info.patterns[pattern]
for _, pred in pairs(preds or {}) do for _, pred in pairs(preds or {}) do
@ -164,7 +170,7 @@ function Query:match_preds(match, pattern, bufnr)
return false return false
end end
local pred_matches = handler(match, pattern, bufnr, pred) local pred_matches = handler(match, pattern, source, pred)
if not xor(is_not, pred_matches) then if not xor(is_not, pred_matches) then
return false return false
@ -182,15 +188,15 @@ end
-- --
-- @returns The matching capture id -- @returns The matching capture id
-- @returns The captured node -- @returns The captured node
function Query:iter_captures(node, bufnr, start, stop) function Query:iter_captures(node, source, start, stop)
if bufnr == 0 then if type(source) == "number" and source == 0 then
bufnr = vim.api.nvim_get_current_buf() source = vim.api.nvim_get_current_buf()
end end
local raw_iter = node:_rawquery(self.query, true, start, stop) local raw_iter = node:_rawquery(self.query, true, start, stop)
local function iter() local function iter()
local capture, captured_node, match = raw_iter() local capture, captured_node, match = raw_iter()
if match ~= nil then if match ~= nil then
local active = self:match_preds(match, match.pattern, bufnr) local active = self:match_preds(match, match.pattern, source)
match.active = active match.active = active
if not active then if not active then
return iter() -- tail call: try next match return iter() -- tail call: try next match
@ -210,15 +216,15 @@ end
-- --
-- @returns The matching pattern id -- @returns The matching pattern id
-- @returns The matching match -- @returns The matching match
function Query:iter_matches(node, bufnr, start, stop) function Query:iter_matches(node, source, start, stop)
if bufnr == 0 then if type(source) == "number" and source == 0 then
bufnr = vim.api.nvim_get_current_buf() source = vim.api.nvim_get_current_buf()
end end
local raw_iter = node:_rawquery(self.query, false, start, stop) local raw_iter = node:_rawquery(self.query, false, start, stop)
local function iter() local function iter()
local pattern, match = raw_iter() local pattern, match = raw_iter()
if match ~= nil then if match ~= nil then
local active = self:match_preds(match, pattern, bufnr) local active = self:match_preds(match, pattern, source)
if not active then if not active then
return iter() -- tail call: try next match return iter() -- tail call: try next match
end end

View File

@ -39,7 +39,7 @@ typedef struct {
static struct luaL_Reg parser_meta[] = { static struct luaL_Reg parser_meta[] = {
{ "__gc", parser_gc }, { "__gc", parser_gc },
{ "__tostring", parser_tostring }, { "__tostring", parser_tostring },
{ "parse_buf", parser_parse_buf }, { "parse", parser_parse },
{ "edit", parser_edit }, { "edit", parser_edit },
{ "tree", parser_tree }, { "tree", parser_tree },
{ "set_included_ranges", parser_set_ranges }, { "set_included_ranges", parser_set_ranges },
@ -306,23 +306,45 @@ static const char *input_cb(void *payload, uint32_t byte_index,
#undef BUFSIZE #undef BUFSIZE
} }
static int parser_parse_buf(lua_State *L) static int parser_parse(lua_State *L)
{ {
TSLua_parser *p = parser_check(L); TSLua_parser *p = parser_check(L);
if (!p) { if (!p) {
return 0; return 0;
} }
long bufnr = lua_tointeger(L, 2); TSTree *new_tree;
buf_T *buf = handle_get_buffer(bufnr); size_t len;
const char *str;
long bufnr;
buf_T *buf;
TSInput input;
if (!buf) { // This switch is necessary because of the behavior of lua_isstring, that
return luaL_error(L, "invalid buffer handle: %d", bufnr); // consider numbers as strings...
switch (lua_type(L, 2)) {
case LUA_TSTRING:
str = lua_tolstring(L, 2, &len);
new_tree = ts_parser_parse_string(p->parser, p->tree, str, len);
break;
case LUA_TNUMBER:
bufnr = lua_tointeger(L, 2);
buf = handle_get_buffer(bufnr);
if (!buf) {
return luaL_error(L, "invalid buffer handle: %d", bufnr);
}
input = (TSInput){ (void *)buf, input_cb, TSInputEncodingUTF8 };
new_tree = ts_parser_parse(p->parser, p->tree, input);
break;
default:
return luaL_error(L, "invalid argument to parser:parse()");
} }
TSInput input = { (void *)buf, input_cb, TSInputEncodingUTF8 };
TSTree *new_tree = ts_parser_parse(p->parser, p->tree, input);
uint32_t n_ranges = 0; uint32_t n_ranges = 0;
TSRange *changed = p->tree ? ts_tree_get_changed_ranges(p->tree, new_tree, TSRange *changed = p->tree ? ts_tree_get_changed_ranges(p->tree, new_tree,
&n_ranges) : NULL; &n_ranges) : NULL;

View File

@ -660,4 +660,35 @@ static int nlua_schedule(lua_State *const lstate)
{ 10, 5, 10, 20 }, { 10, 5, 10, 20 },
{ 14, 9, 14, 27 } }, res) { 14, 9, 14, 27 } }, res)
end) end)
it("allows to create string parsers", function()
local ret = exec_lua [[
local parser = vim.treesitter.get_string_parser("int foo = 42;", "c")
return { parser:parse():root():range() }
]]
eq({ 0, 0, 0, 13 }, ret)
end)
it("allows to run queries with string parsers", function()
local txt = [[
int foo = 42;
int bar = 13;
]]
local ret = exec_lua([[
local str = ...
local parser = vim.treesitter.get_string_parser(str, "c")
local nodes = {}
local query = vim.treesitter.parse_query("c", '((identifier) @id (eq? @id "foo"))')
for _, node in query:iter_captures(parser:parse():root(), str, 0, 2) do
table.insert(nodes, { node:range() })
end
return nodes]], txt)
eq({ {0, 10, 0, 13} }, ret)
end)
end) end)