refactor(lsp): move glob parsing to util (#26519)

refactor(lsp): move glob parsing to vim.glob

Moving the logic for using vim.lpeg to create a match pattern from a
glob into `vim.glob`. There are several places in the LSP spec that
use globs, and it's very useful to have glob matching as a
generally-available utility.
This commit is contained in:
Steven Arcangeli 2023-12-22 02:40:01 -08:00 committed by GitHub
parent db0ec84fb4
commit 92204b06e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 97 additions and 92 deletions

View File

@ -55,6 +55,7 @@ vim._submodules = {
inspect = true,
version = true,
fs = true,
glob = true,
iter = true,
re = true,
text = true,

View File

@ -11,6 +11,7 @@ vim.diagnostic = require('vim.diagnostic')
vim.filetype = require('vim.filetype')
vim.fs = require('vim.fs')
vim.func = require('vim.func')
vim.glob = require('vim.glob')
vim.health = require('vim.health')
vim.highlight = require('vim.highlight')
vim.iter = require('vim.iter')

81
runtime/lua/vim/glob.lua Normal file
View File

@ -0,0 +1,81 @@
local lpeg = vim.lpeg
local M = {}
--- Parses a raw glob into an |lpeg| pattern.
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
--- Glob patterns can have the following syntax:
--- `*` to match one or more characters in a path segment
--- `?` to match on one character in a path segment
--- `**` to match any number of path segments, including none
--- `{}` to group conditions (e.g. `**/*.{ts,js}` matches all TypeScript and JavaScript files)
--- `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lpeg| representation of the pattern
function M.to_lpeg(pattern)
local l = lpeg
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local pathsep = '/'
local function class(inv, ranges)
for i, r in ipairs(ranges) do
ranges[i] = r[1] .. r[2]
end
local patt = l.R(unpack(ranges))
if inv == '!' then
patt = P(1) - patt
end
return patt
end
local function add(acc, a)
return acc + a
end
local function mul(acc, m)
return acc * m
end
local function star(stars, after)
return (-after * (l.P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
return (-after * l.P(1)) ^ 0 * after
end
local p = P({
'Pattern',
Pattern = V('Elem') ^ -1 * V('End'),
Elem = Cf(
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
* (V('Elem') + V('End')),
mul
),
DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
Ques = P('?') * Cc(l.P(1) - pathsep),
Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ Cc(l.P(0)),
Literal = P(1) / l.P,
End = P(-1) * Cc(l.P(-1)),
})
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
return assert(lpeg_pattern, 'Invalid glob')
end
return M

View File

@ -1,4 +1,4 @@
local wf = require('vim.lsp._watchfiles')
local glob = require('vim.glob')
--- @class lsp.DynamicCapabilities
--- @field capabilities table<string, lsp.Registration[]>
@ -97,7 +97,7 @@ function M.match(bufnr, documentSelector)
if matches and filter.scheme and not vim.startswith(uri, filter.scheme .. ':') then
matches = false
end
if matches and filter.pattern and not wf._match(filter.pattern, fname) then
if matches and filter.pattern and not glob.to_lpeg(filter.pattern):match(fname) then
matches = false
end
if matches then

View File

@ -1,4 +1,5 @@
local bit = require('bit')
local glob = require('vim.glob')
local watch = require('vim._watch')
local protocol = require('vim.lsp.protocol')
local ms = protocol.Methods
@ -6,88 +7,6 @@ local lpeg = vim.lpeg
local M = {}
--- Parses the raw pattern into an |lpeg| pattern. LPeg patterns natively support the "this" or "that"
--- alternative constructions described in the LSP spec that cannot be expressed in a standard Lua pattern.
---
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern? pattern An |lpeg| representation of the pattern, or nil if the pattern is invalid.
local function parse(pattern)
local l = lpeg
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local pathsep = '/'
local function class(inv, ranges)
for i, r in ipairs(ranges) do
ranges[i] = r[1] .. r[2]
end
local patt = l.R(unpack(ranges))
if inv == '!' then
patt = P(1) - patt
end
return patt
end
local function add(acc, a)
return acc + a
end
local function mul(acc, m)
return acc * m
end
local function star(stars, after)
return (-after * (l.P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
return (-after * l.P(1)) ^ 0 * after
end
local p = P({
'Pattern',
Pattern = V('Elem') ^ -1 * V('End'),
Elem = Cf(
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
* (V('Elem') + V('End')),
mul
),
DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
Ques = P('?') * Cc(l.P(1) - pathsep),
Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ Cc(l.P(0)),
Literal = P(1) / l.P,
End = P(-1) * Cc(l.P(-1)),
})
return p:match(pattern) --[[@as vim.lpeg.Pattern?]]
end
---@private
--- Implementation of LSP 3.17.0's pattern matching: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
---@param pattern string|vim.lpeg.Pattern The glob pattern (raw or parsed) to match.
---@param s string The string to match against pattern.
---@return boolean Whether or not pattern matches s.
function M._match(pattern, s)
if type(pattern) == 'string' then
local p = assert(parse(pattern))
return p:match(s) ~= nil
end
return pattern:match(s) ~= nil
end
M._watchfunc = (vim.fn.has('win32') == 1 or vim.fn.has('mac') == 1) and watch.watch or watch.poll
---@type table<integer, table<string, function[]>> client id -> registration id -> cancel function
@ -112,9 +31,9 @@ local to_lsp_change_type = {
--- Default excludes the same as VSCode's `files.watcherExclude` setting.
--- https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/workbench/contrib/files/browser/files.contribution.ts#L261
---@type vim.lpeg.Pattern parsed Lpeg pattern
M._poll_exclude_pattern = parse('**/.git/{objects,subtree-cache}/**')
+ parse('**/node_modules/*/**')
+ parse('**/.hg/store/**')
M._poll_exclude_pattern = glob.to_lpeg('**/.git/{objects,subtree-cache}/**')
+ glob.to_lpeg('**/node_modules/*/**')
+ glob.to_lpeg('**/.hg/store/**')
--- Registers the workspace/didChangeWatchedFiles capability dynamically.
---
@ -143,7 +62,7 @@ function M.register(reg, ctx)
local glob_pattern = w.globPattern
if type(glob_pattern) == 'string' then
local pattern = parse(glob_pattern)
local pattern = glob.to_lpeg(glob_pattern)
if not pattern then
error('Cannot parse pattern: ' .. glob_pattern)
end
@ -155,7 +74,7 @@ function M.register(reg, ctx)
local base_uri = glob_pattern.baseUri
local uri = type(base_uri) == 'string' and base_uri or base_uri.uri
local base_dir = vim.uri_to_fname(uri)
local pattern = parse(glob_pattern.pattern)
local pattern = glob.to_lpeg(glob_pattern.pattern)
if not pattern then
error('Cannot parse pattern: ' .. glob_pattern.pattern)
end

View File

@ -1,14 +1,17 @@
local helpers = require('test.functional.helpers')(after_each)
local eq = helpers.eq
local exec_lua = helpers.exec_lua
describe('vim.lsp._watchfiles', function()
describe('glob', function()
before_each(helpers.clear)
after_each(helpers.clear)
local match = function(...)
return exec_lua('return require("vim.lsp._watchfiles")._match(...)', ...)
return exec_lua([[
local pattern = select(1, ...)
local str = select(2, ...)
return require("vim.glob").to_lpeg(pattern):match(str) ~= nil
]], ...)
end
describe('glob matching', function()