fix(glob): handle overlapping {} condition elements #29236

This change fixes an issue where glob patterns like `{a,ab}` would not
match `ab` because the first option `a` matches, then the end of the
string is expected but `b` is found, and LPeg does not backtrack to try
the next option `ab` which would match. The fix here is to also append
the rest of the pattern to the generated LPeg pattern for each option.
This changes a glob `{a,ab}` from being parsed as

    ("a" or "ab") "end of string"

to

    ("a" "end of string" or "ab" "end of string")

Here, matching against `ab` would try the first option, fail to match,
then proceed to the next option, and match.

The sacrifice this change makes is dropping support for nested `{}`
conditions, which VSCode doesn't seem to support or test AFAICT.

Fixes #28931

Co-authored-by: Sergey Slipchenko <faergeek@gmail.com>
This commit is contained in:
Jon Huhn 2024-06-10 06:23:03 -05:00 committed by GitHub
parent 743c5808b6
commit 4bd86120d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 6 deletions

View File

@ -29,8 +29,10 @@ function M.to_lpeg(pattern)
return patt
end
local function add(acc, a)
return acc + a
local function condlist(conds, after)
return vim.iter(conds):fold(P(false), function(acc, cond)
return acc + cond * after
end)
end
local function mul(acc, m)
@ -63,15 +65,14 @@ function M.to_lpeg(pattern)
* C(P('!') ^ -1)
* Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
/ class,
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * P('}'),
CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ Cc(P(0)),
Cond = Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul) + Cc(P(0)),
Literal = P(1) / P,
End = P(-1) * Cc(P(-1)),
})

View File

@ -161,7 +161,7 @@ describe('glob', function()
eq(false, match('{ab,cd}', 'a'))
eq(true, match('{ab,cd}', 'cd'))
eq(true, match('{a,b,c}', 'c'))
eq(true, match('{a,{b,c}}', 'c'))
eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
end)
it('should match [] groups', function()
@ -223,6 +223,17 @@ describe('glob', function()
eq(true, match('{[0-9],[a-z]}', '0'))
eq(true, match('{[0-9],[a-z]}', 'a'))
eq(false, match('{[0-9],[a-z]}', 'A'))
-- glob is from willRename filter in typescript-language-server
-- https://github.com/typescript-language-server/typescript-language-server/blob/b224b878652438bcdd639137a6b1d1a6630129e4/src/lsp-server.ts#L266
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.js'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.ts'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.mts'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.mjs'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.cjs'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.cts'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.jsx'))
eq(true, match('**/*.{ts,js,jsx,tsx,mjs,mts,cjs,cts}', 'test.tsx'))
end)
end)
end)