neovim/runtime/lua/vim/glob.lua
Jon Huhn 4bd86120d4
fix(glob): handle overlapping {} condition elements #29236
This change fixes an issue where glob patterns like `{a,ab}` would not
match `ab` because the first option `a` matches, then the end of the
string is expected but `b` is found, and LPeg does not backtrack to try
the next option `ab` which would match. The fix here is to also append
the rest of the pattern to the generated LPeg pattern for each option.
This changes a glob `{a,ab}` from being parsed as

    ("a" or "ab") "end of string"

to

    ("a" "end of string" or "ab" "end of string")

Here, matching against `ab` would try the first option, fail to match,
then proceed to the next option, and match.

The sacrifice this change makes is dropping support for nested `{}`
conditions, which VSCode doesn't seem to support or test AFAICT.

Fixes #28931

Co-authored-by: Sergey Slipchenko <faergeek@gmail.com>
2024-06-10 04:23:03 -07:00

86 lines
3.1 KiB
Lua

local lpeg = vim.lpeg
local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local M = {}
local pathsep = P('/')
--- Parses a raw glob into an |lua-lpeg| pattern.
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
--- Glob patterns can have the following syntax:
--- - `*` to match one or more characters in a path segment
--- - `?` to match on one character in a path segment
--- - `**` to match any number of path segments, including none
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
function M.to_lpeg(pattern)
local function class(inv, ranges)
local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
if inv == '!' then
patt = P(1) - patt
end
return patt
end
local function condlist(conds, after)
return vim.iter(conds):fold(P(false), function(acc, cond)
return acc + cond * after
end)
end
local function mul(acc, m)
return acc * m
end
local function star(stars, after)
return (-after * (P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
return (-after * P(1)) ^ 0 * after
end
local p = P({
'Pattern',
Pattern = V('Elem') ^ -1 * V('End'),
Elem = Cf(
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
* (V('Elem') + V('End')),
mul
),
DStar = (B(pathsep) + -B(P(1)))
* P('**')
* (pathsep * (V('Elem') + V('End')) + V('End'))
/ dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
Ques = P('?') * Cc(P(1) - pathsep),
Class = P('[')
* C(P('!') ^ -1)
* Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
/ class,
CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul) + Cc(P(0)),
Literal = P(1) / P,
End = P(-1) * Cc(P(-1)),
})
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
assert(lpeg_pattern, 'Invalid glob')
return lpeg_pattern
end
return M