fix(treesitter): remove duplicate symbol names in language.inspect()

**Problems:**

- `vim.treesitter.language.inspect()` returns duplicate
  symbol names, sometimes up to 6 of one kind in the case of `markdown`
- The list-like `symbols` table can have holes and is thus not even a
  valid msgpack table anyway, mentioned in a test

**Solution:** Return symbols as a map, rather than a list, where field
names are the names of the symbol. The boolean value associated with the
field encodes whether or not the symbol is named.

Note that anonymous nodes are surrounded with double quotes (`"`) to
prevent potential collisions with named counterparts that have the same
identifier.
This commit is contained in:
Riley Bruins 2024-09-19 13:08:22 -07:00 committed by Christian Clason
parent 267c7525f7
commit d3193afc25
6 changed files with 42 additions and 26 deletions

View File

@ -1150,8 +1150,13 @@ get_lang({filetype}) *vim.treesitter.language.get_lang()*
inspect({lang}) *vim.treesitter.language.inspect()* inspect({lang}) *vim.treesitter.language.inspect()*
Inspects the provided language. Inspects the provided language.
Inspecting provides some useful information on the language like node Inspecting provides some useful information on the language like node and
names, ... field names, ABI version, and whether the language came from a WASM
module.
Node names are returned in a table mapping each node name to a `boolean`
indicating whether or not the node is named (i.e., not anonymous).
Anonymous nodes are surrounded with double quotes (`"`).
Parameters: ~ Parameters: ~
• {lang} (`string`) Language • {lang} (`string`) Language

View File

@ -240,8 +240,12 @@ function M.omnifunc(findstart, base)
table.insert(items, text) table.insert(items, text)
end end
end end
for _, s in pairs(parser_info.symbols) do for text, named in
local text = s[2] and s[1] or string.format('%q', s[1]):gsub('\n', 'n') ---@type string pairs(parser_info.symbols --[[@as table<string, boolean>]])
do
if not named then
text = string.format('%q', text:sub(2, -2)):gsub('\n', 'n') ---@type string
end
if text:find(base, 1, true) then if text:find(base, 1, true) then
table.insert(items, text) table.insert(items, text)
end end

View File

@ -170,7 +170,12 @@ end
--- Inspects the provided language. --- Inspects the provided language.
--- ---
--- Inspecting provides some useful information on the language like node names, ... --- Inspecting provides some useful information on the language like node and field names, ABI
--- version, and whether the language came from a WASM module.
---
--- Node names are returned in a table mapping each node name to a `boolean` indicating whether or
--- not the node is named (i.e., not anonymous). Anonymous nodes are surrounded with double quotes
--- (`"`).
--- ---
---@param lang string Language ---@param lang string Language
---@return table ---@return table

View File

@ -271,12 +271,16 @@ int tslua_inspect_lang(lua_State *L)
// not used by the API // not used by the API
continue; continue;
} }
lua_createtable(L, 2, 0); // [retval, symbols, elem] const char *name = ts_language_symbol_name(lang, (TSSymbol)i);
lua_pushstring(L, ts_language_symbol_name(lang, (TSSymbol)i)); bool named = t == TSSymbolTypeRegular;
lua_rawseti(L, -2, 1); lua_pushboolean(L, named); // [retval, symbols, is_named]
lua_pushboolean(L, t == TSSymbolTypeRegular); if (!named) {
lua_rawseti(L, -2, 2); // [retval, symbols, elem] char buf[256];
lua_rawseti(L, -2, (int)i); // [retval, symbols] snprintf(buf, sizeof(buf), "\"%s\"", name);
lua_setfield(L, -2, buf); // [retval, symbols]
} else {
lua_setfield(L, -2, name); // [retval, symbols]
}
} }
lua_setfield(L, -2, "symbols"); // [retval] lua_setfield(L, -2, "symbols"); // [retval]

View File

@ -51,7 +51,7 @@ describe('treesitter language API', function()
it('inspects language', function() it('inspects language', function()
local keys, fields, symbols = unpack(exec_lua(function() local keys, fields, symbols = unpack(exec_lua(function()
local lang = vim.treesitter.language.inspect('c') local lang = vim.treesitter.language.inspect('c')
local keys, symbols = {}, {} local keys = {}
for k, v in pairs(lang) do for k, v in pairs(lang) do
if type(v) == 'boolean' then if type(v) == 'boolean' then
keys[k] = v keys[k] = v
@ -60,12 +60,7 @@ describe('treesitter language API', function()
end end
end end
-- symbols array can have "holes" and is thus not a valid msgpack array return { keys, lang.fields, lang.symbols }
-- but we don't care about the numbers here (checked in the parser test)
for _, v in pairs(lang.symbols) do
table.insert(symbols, v)
end
return { keys, lang.fields, symbols }
end)) end))
eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys) eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys)
@ -79,16 +74,19 @@ describe('treesitter language API', function()
eq(true, fset['initializer']) eq(true, fset['initializer'])
local has_named, has_anonymous local has_named, has_anonymous
for _, s in pairs(symbols) do for symbol, named in pairs(symbols) do
eq('string', type(s[1])) eq('string', type(symbol))
eq('boolean', type(s[2])) eq('boolean', type(named))
if s[1] == 'for_statement' and s[2] == true then if symbol == 'for_statement' and named == true then
has_named = true has_named = true
elseif s[1] == '|=' and s[2] == false then elseif symbol == '"|="' and named == false then
has_anonymous = true has_anonymous = true
end end
end end
eq({ true, true }, { has_named, has_anonymous }) eq(
{ has_named = true, has_anonymous = true },
{ has_named = has_named, has_anonymous = has_anonymous }
)
end) end)
it( it(

View File

@ -42,13 +42,13 @@ describe('treesitter parser API', function()
eq('function_definition', exec_lua('return child:type()')) eq('function_definition', exec_lua('return child:type()'))
eq(true, exec_lua('return child:named()')) eq(true, exec_lua('return child:named()'))
eq('number', type(exec_lua('return child:symbol()'))) eq('number', type(exec_lua('return child:symbol()')))
eq({ 'function_definition', true }, exec_lua('return lang.symbols[child:symbol()]')) eq(true, exec_lua('return lang.symbols[child:type()]'))
exec_lua('anon = root:descendant_for_range(0,8,0,9)') exec_lua('anon = root:descendant_for_range(0,8,0,9)')
eq('(', exec_lua('return anon:type()')) eq('(', exec_lua('return anon:type()'))
eq(false, exec_lua('return anon:named()')) eq(false, exec_lua('return anon:named()'))
eq('number', type(exec_lua('return anon:symbol()'))) eq('number', type(exec_lua('return anon:symbol()')))
eq({ '(', false }, exec_lua('return lang.symbols[anon:symbol()]')) eq(false, exec_lua([=[return lang.symbols[string.format('"%s"', anon:type())]]=]))
exec_lua('descendant = root:descendant_for_range(1,2,1,12)') exec_lua('descendant = root:descendant_for_range(1,2,1,12)')
eq('<node declaration>', exec_lua('return tostring(descendant)')) eq('<node declaration>', exec_lua('return tostring(descendant)'))