fix(ignore): ensure normalization of patterns and paths match (fixes #9597) (#9717)

In ignores, normalize the input when parsing it.
When scanning, normalize earlier such that the path is already
normalized when checking ignores. This requires splitting normalization
of the string from normalization of the file, as we don't want to
attempt the latter if the file is ignored.

Closes #9598

---------

Co-authored-by: Jakob Borg <jakob@kastelo.net>
This commit is contained in:
Simon Frei 2024-09-28 17:16:44 +02:00 committed by GitHub
parent 3c476542d2
commit 605fd6d726
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 59 additions and 35 deletions

View File

@ -18,7 +18,9 @@ import (
"time"
"github.com/gobwas/glob"
"golang.org/x/text/unicode/norm"
"github.com/syncthing/syncthing/lib/build"
"github.com/syncthing/syncthing/lib/fs"
"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
"github.com/syncthing/syncthing/lib/osutil"
@ -212,6 +214,11 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
}
// Match matches the patterns plus temporary and internal files.
//
// The "file" parameter must be in the OS' native unicode format (NFD on macos,
// NFC everywhere else). This is always the case in real usage in syncthing, as
// we ensure native unicode normalisation on all entry points (scanning and from
// protocol) - so no need to normalize when calling this, except e.g. in tests.
func (m *Matcher) Match(file string) (result ignoreresult.R) {
switch {
case fs.IsTemporary(file):
@ -387,6 +394,10 @@ func loadParseIncludeFile(filesystem fs.Filesystem, file string, cd ChangeDetect
}
func parseLine(line string) ([]Pattern, error) {
// We use native normalization internally, thus the patterns must match
// that to avoid false negative matches.
line = nativeUnicodeNorm(line)
pattern := Pattern{
result: ignoreresult.Ignored,
}
@ -464,6 +475,13 @@ func parseLine(line string) ([]Pattern, error) {
return patterns, nil
}
func nativeUnicodeNorm(s string) string {
if build.IsDarwin || build.IsIOS {
return norm.NFD.String(s)
}
return norm.NFC.String(s)
}
func parseIgnoreFile(fs fs.Filesystem, fd io.Reader, currentFile string, cd ChangeDetector, linesSeen map[string]struct{}) ([]string, []Pattern, error) {
var patterns []Pattern

View File

@ -805,7 +805,10 @@ func TestIssue3174(t *testing.T) {
t.Fatal(err)
}
if !pats.Match("åäö").IsIgnored() {
// The pattern above is normalized when parsing, and in order for this
// string to match the pattern, it needs to use the same normalization. And
// Go always uses NFC regardless of OS, while we use NFD on macos.
if !pats.Match(nativeUnicodeNorm("åäö")).IsIgnored() {
t.Error("Should match")
}
}

View File

@ -293,6 +293,11 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
return skip
}
// Just in case the filesystem doesn't produce the normalization the OS
// uses, and we use internally.
nonNormPath := path
path = normalizePath(path)
if m := w.Matcher.Match(path); m.IsIgnored() {
l.Debugln(w, "ignored (patterns):", path)
// Only descend if matcher says so and the current file is not a symlink.
@ -319,9 +324,23 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
return nil
}
if path != nonNormPath {
if !w.AutoNormalize {
// We're not authorized to do anything about it, so complain and skip.
handleError(ctx, "normalizing path", nonNormPath, errUTF8Normalization, finishedChan)
return skip
}
path, err = w.applyNormalization(nonNormPath, path, info)
if err != nil {
handleError(ctx, "normalizing path", nonNormPath, err, finishedChan)
return skip
}
}
if ignoredParent == "" {
// parent isn't ignored, nothing special
return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip)
return w.handleItem(ctx, path, info, toHashChan, finishedChan)
}
// Part of current path below the ignored (potential) parent
@ -330,7 +349,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
// ignored path isn't actually a parent of the current path
if rel == path {
ignoredParent = ""
return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip)
return w.handleItem(ctx, path, info, toHashChan, finishedChan)
}
// The previously ignored parent directories of the current, not
@ -345,7 +364,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
handleError(ctx, "scan", ignoredParent, err, finishedChan)
return skip
}
if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan, skip); err != nil {
if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan); err != nil {
return err
}
}
@ -355,14 +374,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
}
}
func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult, skip error) error {
oldPath := path
path, err := w.normalizePath(path, info)
if err != nil {
handleError(ctx, "normalizing path", oldPath, err, finishedChan)
return skip
}
func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult) error {
switch {
case info.IsSymlink():
if err := w.walkSymlink(ctx, path, info, finishedChan); err != nil {
@ -375,13 +387,13 @@ func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo,
return nil
case info.IsDir():
err = w.walkDir(ctx, path, info, finishedChan)
return w.walkDir(ctx, path, info, finishedChan)
case info.IsRegular():
err = w.walkRegular(ctx, path, info, toHashChan)
return w.walkRegular(ctx, path, info, toHashChan)
}
return err
return fmt.Errorf("bug: file info for %v is neither symlink, dir nor regular", path)
}
func (w *walker) walkRegular(ctx context.Context, relPath string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo) error {
@ -550,30 +562,21 @@ func (w *walker) walkSymlink(ctx context.Context, relPath string, info fs.FileIn
return nil
}
// normalizePath returns the normalized relative path (possibly after fixing
// it on disk), or skip is true.
func (w *walker) normalizePath(path string, info fs.FileInfo) (normPath string, err error) {
func normalizePath(path string) string {
if build.IsDarwin || build.IsIOS {
// Mac OS X file names should always be NFD normalized.
normPath = norm.NFD.String(path)
} else {
// Every other OS in the known universe uses NFC or just plain
// doesn't bother to define an encoding. In our case *we* do care,
// so we enforce NFC regardless.
normPath = norm.NFC.String(path)
}
if path == normPath {
// The file name is already normalized: nothing to do
return path, nil
}
if !w.AutoNormalize {
// We're not authorized to do anything about it, so complain and skip.
return "", errUTF8Normalization
return norm.NFD.String(path)
}
// Every other OS in the known universe uses NFC or just plain
// doesn't bother to define an encoding. In our case *we* do care,
// so we enforce NFC regardless.
return norm.NFC.String(path)
}
// applyNormalization fixes the normalization of the file on disk, i.e. ensures
// the file at path ends up named normPath. It shouldn't but may happen that the
// file ends up with a different name, in which case that one should be scanned.
func (w *walker) applyNormalization(path, normPath string, info fs.FileInfo) (string, error) {
// We will attempt to normalize it.
normInfo, err := w.Filesystem.Lstat(normPath)
if fs.IsNotExist(err) {