syncthing/lib/scanner/blockqueue.go

147 lines
3.5 KiB
Go
Raw Normal View History

2014-11-16 13:13:20 -07:00
// Copyright (C) 2014 The Syncthing Authors.
2014-09-29 12:43:32 -07:00
//
2015-03-07 13:36:35 -07:00
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
2014-07-30 11:10:46 -07:00
package scanner
import (
"context"
"errors"
2014-07-30 11:10:46 -07:00
"github.com/syncthing/syncthing/lib/fs"
2015-09-22 10:38:46 -07:00
"github.com/syncthing/syncthing/lib/protocol"
2015-08-06 02:29:25 -07:00
"github.com/syncthing/syncthing/lib/sync"
2014-07-30 11:10:46 -07:00
)
// HashFile hashes the files and returns a list of blocks representing the file.
func HashFile(ctx context.Context, fs fs.Filesystem, path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
fd, err := fs.Open(path)
2014-10-03 15:15:54 -07:00
if err != nil {
Implement facility based logger, debugging via REST API This implements a new debug/trace infrastructure based on a slightly hacked up logger. Instead of the traditional "if debug { ... }" I've rewritten the logger to have no-op Debugln and Debugf, unless debugging has been enabled for a given "facility". The "facility" is just a string, typically a package name. This will be slightly slower than before; but not that much as it's mostly a function call that returns immediately. For the cases where it matters (the Debugln takes a hex.Dump() of something for example, and it's not in a very occasional "if err != nil" branch) there is an l.ShouldDebug(facility) that is fast enough to be used like the old "if debug". The point of all this is that we can now toggle debugging for the various packages on and off at runtime. There's a new method /rest/system/debug that can be POSTed a set of facilities to enable and disable debug for, or GET from to get a list of facilities with descriptions and their current debug status. Similarly a /rest/system/log?since=... can grab the latest log entries, up to 250 of them (hardcoded constant in main.go) plus the initial few. Not implemented in this commit (but planned) is a simple debug GUI available on /debug that shows the current log in an easily pasteable format and has checkboxes to enable the various debug facilities. The debug instructions to a user then becomes "visit this URL, check these boxes, reproduce your problem, copy and paste the log". The actual log viewer on the hypothetical /debug URL can poll regularly for new log entries and this bypass the 250 line limit. The existing STTRACE=foo variable is still obeyed and just sets the start state of the system.
2015-10-03 08:25:21 -07:00
l.Debugln("open:", err)
return nil, err
2014-10-03 15:15:54 -07:00
}
2015-08-26 15:49:06 -07:00
defer fd.Close()
2014-07-30 11:10:46 -07:00
// Get the size and modtime of the file before we start hashing it.
fi, err := fd.Stat()
if err != nil {
l.Debugln("stat before:", err)
return nil, err
}
size := fi.Size()
modTime := fi.ModTime()
// Hash the file. This may take a while for large files.
blocks, err := Blocks(ctx, fd, blockSize, size, counter, useWeakHashes)
if err != nil {
l.Debugln("blocks:", err)
return nil, err
}
// Recheck the size and modtime again. If they differ, the file changed
// while we were reading it and our hash results are invalid.
fi, err = fd.Stat()
if err != nil {
l.Debugln("stat after:", err)
return nil, err
}
if size != fi.Size() || !modTime.Equal(fi.ModTime()) {
return nil, errors.New("file changed during hashing")
2014-10-03 15:15:54 -07:00
}
2015-08-26 15:49:06 -07:00
return blocks, nil
2014-10-03 15:15:54 -07:00
}
2014-07-30 11:10:46 -07:00
// The parallel hasher reads FileInfo structures from the inbox, hashes the
// file to populate the Blocks element and sends it to the outbox. A number of
// workers are used in parallel. The outbox will become closed when the inbox
// is closed and all items handled.
type parallelHasher struct {
fs fs.Filesystem
blockSize int
workers int
outbox chan<- protocol.FileInfo
inbox <-chan protocol.FileInfo
counter Counter
done chan<- struct{}
useWeakHashes bool
wg sync.WaitGroup
}
func newParallelHasher(ctx context.Context, fs fs.Filesystem, blockSize, workers int, outbox chan<- protocol.FileInfo, inbox <-chan protocol.FileInfo, counter Counter, done chan<- struct{}, useWeakHashes bool) {
ph := &parallelHasher{
fs: fs,
blockSize: blockSize,
workers: workers,
outbox: outbox,
inbox: inbox,
counter: counter,
done: done,
useWeakHashes: useWeakHashes,
wg: sync.NewWaitGroup(),
}
for i := 0; i < workers; i++ {
ph.wg.Add(1)
go ph.hashFiles(ctx)
}
go ph.closeWhenDone()
}
func (ph *parallelHasher) hashFiles(ctx context.Context) {
defer ph.wg.Done()
2015-11-13 07:00:32 -07:00
for {
select {
case f, ok := <-ph.inbox:
2015-11-13 07:00:32 -07:00
if !ok {
return
}
2014-07-30 11:10:46 -07:00
2015-11-13 07:00:32 -07:00
if f.IsDirectory() || f.IsDeleted() {
panic("Bug. Asked to hash a directory or a deleted file.")
}
2014-07-30 11:10:46 -07:00
blocks, err := HashFile(ctx, ph.fs, f.Name, ph.blockSize, ph.counter, ph.useWeakHashes)
2015-11-13 07:00:32 -07:00
if err != nil {
l.Debugln("hash error:", f.Name, err)
continue
}
f.Blocks = blocks
// The size we saw when initially deciding to hash the file
// might not have been the size it actually had when we hashed
// it. Update the size from the block list.
f.Size = 0
for _, b := range blocks {
f.Size += int64(b.Size)
}
2015-11-13 07:00:32 -07:00
select {
case ph.outbox <- f:
case <-ctx.Done():
2015-11-13 07:00:32 -07:00
return
}
case <-ctx.Done():
2015-11-13 07:00:32 -07:00
return
}
2014-07-30 11:10:46 -07:00
}
}
func (ph *parallelHasher) closeWhenDone() {
ph.wg.Wait()
if ph.done != nil {
close(ph.done)
}
close(ph.outbox)
}