From 8e5c48b08dad54706500e353c58ffb91f2684dd3 Mon Sep 17 00:00:00 2001 From: Famiu Haque Date: Wed, 17 Apr 2024 01:13:44 +0600 Subject: [PATCH] feat(lua): vim.fs.normalize() resolves ".", ".." #28203 Problem: `vim.fs.normalize` does not resolve `.` and `..` components. This makes no sense as the entire point of normalization is to remove redundancy from the path. The path normalization functions in several other languages (Java, Python, C++, etc.) also resolve `.` and `..` components. Reference: - Python: https://docs.python.org/3/library/os.path.html#os.path.normpath - Java: https://docs.oracle.com/javase/8/docs/api/java/nio/file/Path.html#normalize-- - C++: https://en.cppreference.com/w/cpp/filesystem/path/lexically_normal Solution: Resolve "." and ".." in `vim.fs.normalize`. Before: "~/foo/bar/../baz/./" => "~/foo/bar/../baz/." After: "~/foo/bar/../baz/./" => "~/foo/baz" --- runtime/doc/lua.txt | 36 ++++-- runtime/lua/vim/fs.lua | 193 +++++++++++++++++++++++++++----- test/functional/lua/fs_spec.lua | 118 ++++++++++++++++--- 3 files changed, 292 insertions(+), 55 deletions(-) diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt index 2c0307a409..fdc50082d3 100644 --- a/runtime/doc/lua.txt +++ b/runtime/doc/lua.txt @@ -2964,26 +2964,40 @@ vim.fs.joinpath({...}) *vim.fs.joinpath()* vim.fs.normalize({path}, {opts}) *vim.fs.normalize()* Normalize a path to a standard format. A tilde (~) character at the beginning of the path is expanded to the user's home directory and - environment variables are also expanded. + environment variables are also expanded. "." and ".." components are also + resolved, except when the path is relative and trying to resolve it would + result in an absolute path. + • "." as the only part in a relative path: + • "." => "." + • "././" => "." + • ".." when it leads outside the current directory + • "foo/../../bar" => "../bar" + • "../../foo" => "../../foo" + • ".." in the root directory returns the root directory. + • "/../../" => "/" On Windows, backslash (\) characters are converted to forward slashes (/). Examples: >lua - vim.fs.normalize('C:\\\\Users\\\\jdoe') - -- On Windows: 'C:/Users/jdoe' - - vim.fs.normalize('~/src/neovim') - -- '/home/jdoe/src/neovim' - - vim.fs.normalize('$XDG_CONFIG_HOME/nvim/init.vim') - -- '/Users/jdoe/.config/nvim/init.vim' + [[C:\Users\jdoe]] => "C:/Users/jdoe" + "~/src/neovim" => "/home/jdoe/src/neovim" + "$XDG_CONFIG_HOME/nvim/init.vim" => "/Users/jdoe/.config/nvim/init.vim" + "~/src/nvim/api/../tui/./tui.c" => "/home/jdoe/src/nvim/tui/tui.c" + "./foo/bar" => "foo/bar" + "foo/../../../bar" => "../../bar" + "/home/jdoe/../../../bar" => "/bar" + "C:foo/../../baz" => "C:../baz" + "C:/foo/../../baz" => "C:/baz" + [[\\?\UNC\server\share\foo\..\..\..\bar]] => "//?/UNC/server/share/bar" < Parameters: ~ • {path} (`string`) Path to normalize • {opts} (`table?`) A table with the following fields: - • {expand_env} (`boolean`, default: `true`) Expand environment - variables. + • {expand_env}? (`boolean`, default: `true`) Expand + environment variables. + • {win}? (`boolean`, default: `true` in Windows, `false` + otherwise) Path is a Windows path. Return: ~ (`string`) Normalized path diff --git a/runtime/lua/vim/fs.lua b/runtime/lua/vim/fs.lua index ad0d914ea2..65ad58c720 100644 --- a/runtime/lua/vim/fs.lua +++ b/runtime/lua/vim/fs.lua @@ -334,30 +334,147 @@ function M.find(names, opts) return matches end +--- Split a Windows path into a prefix and a body, such that the body can be processed like a POSIX +--- path. The path must use forward slashes as path separator. +--- +--- Does not check if the path is a valid Windows path. Invalid paths will give invalid results. +--- +--- Examples: +--- - `//./C:/foo/bar` -> `//./C:`, `/foo/bar` +--- - `//?/UNC/server/share/foo/bar` -> `//?/UNC/server/share`, `/foo/bar` +--- - `//./system07/C$/foo/bar` -> `//./system07`, `/C$/foo/bar` +--- - `C:/foo/bar` -> `C:`, `/foo/bar` +--- - `C:foo/bar` -> `C:`, `foo/bar` +--- +--- @param path string Path to split. +--- @return string, string, boolean : prefix, body, whether path is invalid. +local function split_windows_path(path) + local prefix = '' + + --- Match pattern. If there is a match, move the matched pattern from the path to the prefix. + --- Returns the matched pattern. + --- + --- @param pattern string Pattern to match. + --- @return string|nil Matched pattern + local function match_to_prefix(pattern) + local match = path:match(pattern) + + if match then + prefix = prefix .. match --[[ @as string ]] + path = path:sub(#match + 1) + end + + return match + end + + local function process_unc_path() + return match_to_prefix('[^/]+/+[^/]+/+') + end + + if match_to_prefix('^//[?.]/') then + -- Device paths + local device = match_to_prefix('[^/]+/+') + + -- Return early if device pattern doesn't match, or if device is UNC and it's not a valid path + if not device or (device:match('^UNC/+$') and not process_unc_path()) then + return prefix, path, false + end + elseif match_to_prefix('^//') then + -- Process UNC path, return early if it's invalid + if not process_unc_path() then + return prefix, path, false + end + elseif path:match('^%w:') then + -- Drive paths + prefix, path = path:sub(1, 2), path:sub(3) + end + + -- If there are slashes at the end of the prefix, move them to the start of the body. This is to + -- ensure that the body is treated as an absolute path. For paths like C:foo/bar, there are no + -- slashes at the end of the prefix, so it will be treated as a relative path, as it should be. + local trailing_slash = prefix:match('/+$') + + if trailing_slash then + prefix = prefix:sub(1, -1 - #trailing_slash) + path = trailing_slash .. path --[[ @as string ]] + end + + return prefix, path, true +end + +--- Resolve `.` and `..` components in a POSIX-style path. This also removes extraneous slashes. +--- `..` is not resolved if the path is relative and resolving it requires the path to be absolute. +--- If a relative path resolves to the current directory, an empty string is returned. +--- +--- @see M.normalize() +--- @param path string Path to resolve. +--- @return string Resolved path. +local function path_resolve_dot(path) + local is_path_absolute = vim.startswith(path, '/') + -- Split the path into components and process them + local path_components = vim.split(path, '/') + local new_path_components = {} + + for _, component in ipairs(path_components) do + if component == '.' or component == '' then -- luacheck: ignore 542 + -- Skip `.` components and empty components + elseif component == '..' then + if #new_path_components > 0 and new_path_components[#new_path_components] ~= '..' then + -- For `..`, remove the last component if we're still inside the current directory, except + -- when the last component is `..` itself + table.remove(new_path_components) + elseif is_path_absolute then -- luacheck: ignore 542 + -- Reached the root directory in absolute path, do nothing + else + -- Reached current directory in relative path, add `..` to the path + table.insert(new_path_components, component) + end + else + table.insert(new_path_components, component) + end + end + + return (is_path_absolute and '/' or '') .. table.concat(new_path_components, '/') +end + --- @class vim.fs.normalize.Opts --- @inlinedoc --- --- Expand environment variables. --- (default: `true`) ---- @field expand_env boolean +--- @field expand_env? boolean +--- +--- Path is a Windows path. +--- (default: `true` in Windows, `false` otherwise) +--- @field win? boolean ---- Normalize a path to a standard format. A tilde (~) character at the ---- beginning of the path is expanded to the user's home directory and ---- environment variables are also expanded. +--- Normalize a path to a standard format. A tilde (~) character at the beginning of the path is +--- expanded to the user's home directory and environment variables are also expanded. "." and ".." +--- components are also resolved, except when the path is relative and trying to resolve it would +--- result in an absolute path. +--- - "." as the only part in a relative path: +--- - "." => "." +--- - "././" => "." +--- - ".." when it leads outside the current directory +--- - "foo/../../bar" => "../bar" +--- - "../../foo" => "../../foo" +--- - ".." in the root directory returns the root directory. +--- - "/../../" => "/" --- --- On Windows, backslash (\) characters are converted to forward slashes (/). --- --- Examples: ---- --- ```lua ---- vim.fs.normalize('C:\\\\Users\\\\jdoe') ---- -- On Windows: 'C:/Users/jdoe' ---- ---- vim.fs.normalize('~/src/neovim') ---- -- '/home/jdoe/src/neovim' ---- ---- vim.fs.normalize('$XDG_CONFIG_HOME/nvim/init.vim') ---- -- '/Users/jdoe/.config/nvim/init.vim' +--- [[C:\Users\jdoe]] => "C:/Users/jdoe" +--- "~/src/neovim" => "/home/jdoe/src/neovim" +--- "$XDG_CONFIG_HOME/nvim/init.vim" => "/Users/jdoe/.config/nvim/init.vim" +--- "~/src/nvim/api/../tui/./tui.c" => "/home/jdoe/src/nvim/tui/tui.c" +--- "./foo/bar" => "foo/bar" +--- "foo/../../../bar" => "../../bar" +--- "/home/jdoe/../../../bar" => "/bar" +--- "C:foo/../../baz" => "C:../baz" +--- "C:/foo/../../baz" => "C:/baz" +--- [[\\?\UNC\server\share\foo\..\..\..\bar]] => "//?/UNC/server/share/bar" --- ``` --- ---@param path (string) Path to normalize @@ -369,12 +486,21 @@ function M.normalize(path, opts) vim.validate({ path = { path, { 'string' } }, expand_env = { opts.expand_env, { 'boolean' }, true }, + win = { opts.win, { 'boolean' }, true }, }) + local win = opts.win == nil and iswin or not not opts.win + local os_sep_local = win and '\\' or '/' + + -- Empty path is already normalized + if path == '' then + return '' + end + -- Expand ~ to users home directory if vim.startswith(path, '~') then local home = vim.uv.os_homedir() or '~' - if home:sub(-1) == os_sep then + if home:sub(-1) == os_sep_local then home = home:sub(1, -2) end path = home .. path:sub(2) @@ -386,24 +512,35 @@ function M.normalize(path, opts) end -- Convert path separator to `/` - path = path:gsub(os_sep, '/') + path = path:gsub(os_sep_local, '/') - -- Don't modify leading double slash as those have implementation-defined behavior according to - -- POSIX. They are also valid UNC paths. Three or more leading slashes are however collapsed to - -- a single slash. - if vim.startswith(path, '//') and not vim.startswith(path, '///') then - path = '/' .. path:gsub('/+', '/') - else - path = path:gsub('/+', '/') + -- Check for double slashes at the start of the path because they have special meaning + local double_slash = vim.startswith(path, '//') and not vim.startswith(path, '///') + local prefix = '' + + if win then + local is_valid --- @type boolean + -- Split Windows paths into prefix and body to make processing easier + prefix, path, is_valid = split_windows_path(path) + + -- If path is not valid, return it as-is + if not is_valid then + return prefix .. path + end + + -- Remove extraneous slashes from the prefix + prefix = prefix:gsub('/+', '/') end - -- Ensure last slash is not truncated from root drive on Windows - if iswin and path:match('^%w:/$') then - return path - end + -- Resolve `.` and `..` components and remove extraneous slashes from path, then recombine prefix + -- and path. Preserve leading double slashes as they indicate UNC paths and DOS device paths in + -- Windows and have implementation-defined behavior in POSIX. + path = (double_slash and '/' or '') .. prefix .. path_resolve_dot(path) - -- Remove trailing slashes - path = path:gsub('(.)/$', '%1') + -- Change empty path to `.` + if path == '' then + path = '.' + end return path end diff --git a/test/functional/lua/fs_spec.lua b/test/functional/lua/fs_spec.lua index d3bd19e35b..01d352ef14 100644 --- a/test/functional/lua/fs_spec.lua +++ b/test/functional/lua/fs_spec.lua @@ -308,26 +308,112 @@ describe('vim.fs', function() ) end) - it('works with UNC paths', function() - eq('//foo', vim.fs.normalize('//foo')) -- UNC path - eq('//foo/bar', vim.fs.normalize('//foo//bar////')) -- UNC path - eq('/foo', vim.fs.normalize('///foo')) -- Not a UNC path - eq('/', vim.fs.normalize('//')) -- Not a UNC path - eq('/', vim.fs.normalize('///')) -- Not a UNC path - eq('/foo/bar', vim.fs.normalize('/foo//bar////')) -- Not a UNC path + -- Opts required for testing posix paths and win paths + local posix_opts = is_os('win') and { win = false } or {} + local win_opts = is_os('win') and {} or { win = true } + + it('preserves leading double slashes in POSIX paths', function() + eq('//foo', vim.fs.normalize('//foo', posix_opts)) + eq('//foo/bar', vim.fs.normalize('//foo//bar////', posix_opts)) + eq('/foo', vim.fs.normalize('///foo', posix_opts)) + eq('//', vim.fs.normalize('//', posix_opts)) + eq('/', vim.fs.normalize('///', posix_opts)) + eq('/foo/bar', vim.fs.normalize('/foo//bar////', posix_opts)) end) - if is_os('win') then - it('Last slash is not truncated from root drive', function() - eq('C:/', vim.fs.normalize('C:/')) + it('allows backslashes on unix-based os', function() + eq('/home/user/hello\\world', vim.fs.normalize('/home/user/hello\\world', posix_opts)) + end) + + it('preserves / after drive letters', function() + eq('C:/', vim.fs.normalize([[C:\]], win_opts)) + end) + + it('works with UNC and DOS device paths', function() + eq('//server/share/foo/bar', vim.fs.normalize([[\\server\\share\\\foo\bar\\\]], win_opts)) + eq('//system07/C$/', vim.fs.normalize([[\\system07\C$\\\\]], win_opts)) + eq('//./C:/foo/bar', vim.fs.normalize([[\\.\\C:\foo\\\\bar]], win_opts)) + eq('//?/C:/foo/bar', vim.fs.normalize([[\\?\C:\\\foo\bar\\\\]], win_opts)) + eq( + '//?/UNC/server/share/foo/bar', + vim.fs.normalize([[\\?\UNC\server\\\share\\\\foo\\\bar]], win_opts) + ) + eq('//./BootPartition/foo/bar', vim.fs.normalize([[\\.\BootPartition\\foo\bar]], win_opts)) + eq( + '//./Volume{12345678-1234-1234-1234-1234567890AB}/foo/bar', + vim.fs.normalize([[\\.\Volume{12345678-1234-1234-1234-1234567890AB}\\\foo\bar\\]], win_opts) + ) + end) + + it('handles invalid UNC and DOS device paths', function() + eq('//server/share', vim.fs.normalize([[\\server\share]], win_opts)) + eq('//server/', vim.fs.normalize([[\\server\]], win_opts)) + eq('//./UNC/server/share', vim.fs.normalize([[\\.\UNC\server\share]], win_opts)) + eq('//?/UNC/server/', vim.fs.normalize([[\\?\UNC\server\]], win_opts)) + eq('//?/UNC/server/..', vim.fs.normalize([[\\?\UNC\server\..]], win_opts)) + eq('//./', vim.fs.normalize([[\\.\]], win_opts)) + eq('//./foo', vim.fs.normalize([[\\.\foo]], win_opts)) + eq('//./BootPartition', vim.fs.normalize([[\\.\BootPartition]], win_opts)) + end) + + it('converts backward slashes', function() + eq('C:/Users/jdoe', vim.fs.normalize([[C:\Users\jdoe]], win_opts)) + end) + + describe('. and .. component resolving', function() + it('works', function() + -- Windows paths + eq('C:/Users', vim.fs.normalize([[C:\Users\jdoe\Downloads\.\..\..\]], win_opts)) + eq('C:/Users/jdoe', vim.fs.normalize([[C:\Users\jdoe\Downloads\.\..\.\.\]], win_opts)) + eq('C:/', vim.fs.normalize('C:/Users/jdoe/Downloads/./../../../', win_opts)) + eq('C:foo', vim.fs.normalize([[C:foo\bar\.\..\.]], win_opts)) + -- POSIX paths + eq('/home', vim.fs.normalize('/home/jdoe/Downloads/./../..', posix_opts)) + eq('/home/jdoe', vim.fs.normalize('/home/jdoe/Downloads/./../././', posix_opts)) + eq('/', vim.fs.normalize('/home/jdoe/Downloads/./../../../', posix_opts)) + -- OS-agnostic relative paths + eq('foo/bar/baz', vim.fs.normalize('foo/bar/foobar/../baz/./')) + eq('foo/bar', vim.fs.normalize('foo/bar/foobar/../baz/./../../bar/./.')) end) - it('converts backward slashes', function() - eq('C:/Users/jdoe', vim.fs.normalize('C:\\Users\\jdoe')) + + it('works when relative path reaches current directory', function() + eq('C:', vim.fs.normalize('C:foo/bar/../../.', win_opts)) + + eq('.', vim.fs.normalize('.')) + eq('.', vim.fs.normalize('././././')) + eq('.', vim.fs.normalize('foo/bar/../../.')) end) - else - it('allows backslashes on unix-based os', function() - eq('/home/user/hello\\world', vim.fs.normalize('/home/user/hello\\world')) + + it('works when relative path goes outside current directory', function() + eq('../../foo/bar', vim.fs.normalize('../../foo/bar')) + eq('../foo', vim.fs.normalize('foo/bar/../../../foo')) + + eq('C:../foo', vim.fs.normalize('C:../foo', win_opts)) + eq('C:../../foo/bar', vim.fs.normalize('C:foo/../../../foo/bar', win_opts)) end) - end + + it('.. in root directory resolves to itself', function() + eq('C:/', vim.fs.normalize('C:/../../', win_opts)) + eq('C:/foo', vim.fs.normalize('C:/foo/../../foo', win_opts)) + + eq('//server/share/', vim.fs.normalize([[\\server\share\..\..]], win_opts)) + eq('//server/share/foo', vim.fs.normalize([[\\server\\share\foo\..\..\foo]], win_opts)) + + eq('//./C:/', vim.fs.normalize([[\\.\C:\..\..]], win_opts)) + eq('//?/C:/foo', vim.fs.normalize([[\\?\C:\..\..\foo]], win_opts)) + + eq('//./UNC/server/share/', vim.fs.normalize([[\\.\UNC\\server\share\..\..\]], win_opts)) + eq( + '//?/UNC/server/share/foo', + vim.fs.normalize([[\\?\UNC\server\\share\..\..\foo]], win_opts) + ) + + eq('//?/BootPartition/', vim.fs.normalize([[\\?\BootPartition\..\..]], win_opts)) + eq('//./BootPartition/foo', vim.fs.normalize([[\\.\BootPartition\..\..\foo]], win_opts)) + + eq('/', vim.fs.normalize('/../../', posix_opts)) + eq('/foo', vim.fs.normalize('/foo/../../foo', posix_opts)) + end) + end) end) end)