2023-12-22 03:40:01 -07:00
local lpeg = vim.lpeg
2024-01-18 02:02:35 -07:00
local P , S , V , R , B = lpeg.P , lpeg.S , lpeg.V , lpeg.R , lpeg.B
local C , Cc , Ct , Cf = lpeg.C , lpeg.Cc , lpeg.Ct , lpeg.Cf
2023-12-22 03:40:01 -07:00
local M = { }
2024-01-18 02:02:35 -07:00
local pathsep = P ( ' / ' )
2024-01-02 06:32:43 -07:00
--- Parses a raw glob into an |lua-lpeg| pattern.
2023-12-22 03:40:01 -07:00
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
2024-01-02 06:32:43 -07:00
---
2023-12-22 03:40:01 -07:00
--- Glob patterns can have the following syntax:
2024-01-02 06:32:43 -07:00
--- - `*` to match one or more characters in a path segment
--- - `?` to match on one character in a path segment
--- - `**` to match any number of path segments, including none
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---
2023-12-22 03:40:01 -07:00
---@param pattern string The raw glob pattern
2024-01-02 06:32:43 -07:00
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
2023-12-22 03:40:01 -07:00
function M . to_lpeg ( pattern )
local function class ( inv , ranges )
2024-01-18 02:02:35 -07:00
local patt = R ( unpack ( vim.tbl_map ( table.concat , ranges ) ) )
2023-12-22 03:40:01 -07:00
if inv == ' ! ' then
patt = P ( 1 ) - patt
end
return patt
end
local function add ( acc , a )
return acc + a
end
local function mul ( acc , m )
return acc * m
end
local function star ( stars , after )
2024-01-18 02:02:35 -07:00
return ( - after * ( P ( 1 ) - pathsep ) ) ^ # stars * after
2023-12-22 03:40:01 -07:00
end
local function dstar ( after )
2024-01-18 02:02:35 -07:00
return ( - after * P ( 1 ) ) ^ 0 * after
2023-12-22 03:40:01 -07:00
end
local p = P ( {
' Pattern ' ,
Pattern = V ( ' Elem ' ) ^ - 1 * V ( ' End ' ) ,
Elem = Cf (
( V ( ' DStar ' ) + V ( ' Star ' ) + V ( ' Ques ' ) + V ( ' Class ' ) + V ( ' CondList ' ) + V ( ' Literal ' ) )
* ( V ( ' Elem ' ) + V ( ' End ' ) ) ,
mul
) ,
2024-01-18 02:02:35 -07:00
DStar = ( B ( pathsep ) + - B ( P ( 1 ) ) )
* P ( ' ** ' )
* ( pathsep * ( V ( ' Elem ' ) + V ( ' End ' ) ) + V ( ' End ' ) )
/ dstar ,
2023-12-22 03:40:01 -07:00
Star = C ( P ( ' * ' ) ^ 1 ) * ( V ( ' Elem ' ) + V ( ' End ' ) ) / star ,
2024-01-18 02:02:35 -07:00
Ques = P ( ' ? ' ) * Cc ( P ( 1 ) - pathsep ) ,
Class = P ( ' [ ' )
* C ( P ( ' ! ' ) ^ - 1 )
* Ct ( Ct ( C ( P ( 1 ) ) * P ( ' - ' ) * C ( P ( 1 ) - P ( ' ] ' ) ) ) ^ 1 * P ( ' ] ' ) )
/ class ,
CondList = P ( ' { ' ) * Cf ( V ( ' Cond ' ) * ( P ( ' , ' ) * V ( ' Cond ' ) ) ^ 0 , add ) * P ( ' } ' ) ,
2023-12-22 03:40:01 -07:00
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf ( ( V ( ' Ques ' ) + V ( ' Class ' ) + V ( ' CondList ' ) + ( V ( ' Literal ' ) - S ( ' ,} ' ) ) ) ^ 1 , mul )
2024-01-18 02:02:35 -07:00
+ Cc ( P ( 0 ) ) ,
Literal = P ( 1 ) / P ,
End = P ( - 1 ) * Cc ( P ( - 1 ) ) ,
2023-12-22 03:40:01 -07:00
} )
local lpeg_pattern = p : match ( pattern ) --[[@as vim.lpeg.Pattern?]]
2024-01-17 12:34:25 -07:00
assert ( lpeg_pattern , ' Invalid glob ' )
return lpeg_pattern
2023-12-22 03:40:01 -07:00
end
return M