2016-12-14 16:30:29 -07:00
|
|
|
// Copyright (C) 2016 The Syncthing Authors.
|
|
|
|
//
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
// You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
package weakhash
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"io"
|
|
|
|
"os"
|
2017-01-04 14:04:13 -07:00
|
|
|
|
|
|
|
"github.com/chmduquesne/rollinghash/adler32"
|
2016-12-14 16:30:29 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
Size = 4
|
2017-01-24 01:26:45 -07:00
|
|
|
|
|
|
|
// don't track more hits than this for any given weakhash
|
|
|
|
maxWeakhashFinderHits = 10
|
2016-12-14 16:30:29 -07:00
|
|
|
)
|
|
|
|
|
2017-02-06 03:27:11 -07:00
|
|
|
var (
|
|
|
|
Enabled = true
|
|
|
|
)
|
|
|
|
|
2016-12-14 16:30:29 -07:00
|
|
|
// Find finds all the blocks of the given size within io.Reader that matches
|
|
|
|
// the hashes provided, and returns a hash -> slice of offsets within reader
|
|
|
|
// map, that produces the same weak hash.
|
|
|
|
func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, error) {
|
2017-02-06 03:27:11 -07:00
|
|
|
if ir == nil || len(hashesToFind) == 0 {
|
2016-12-14 16:30:29 -07:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
r := bufio.NewReader(ir)
|
2017-01-04 14:04:13 -07:00
|
|
|
hf := adler32.New()
|
2016-12-14 16:30:29 -07:00
|
|
|
|
|
|
|
n, err := io.CopyN(hf, r, int64(size))
|
|
|
|
if err == io.EOF {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if n != int64(size) {
|
|
|
|
return nil, io.ErrShortBuffer
|
|
|
|
}
|
|
|
|
|
|
|
|
offsets := make(map[uint32][]int64)
|
|
|
|
for _, hashToFind := range hashesToFind {
|
2017-02-06 03:27:11 -07:00
|
|
|
offsets[hashToFind] = make([]int64, 0, maxWeakhashFinderHits)
|
2016-12-14 16:30:29 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
var i int64
|
|
|
|
var hash uint32
|
|
|
|
for {
|
|
|
|
hash = hf.Sum32()
|
2017-01-24 01:26:45 -07:00
|
|
|
if existing, ok := offsets[hash]; ok && len(existing) < maxWeakhashFinderHits {
|
2016-12-14 16:30:29 -07:00
|
|
|
offsets[hash] = append(existing, i)
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
|
|
|
|
bt, err := r.ReadByte()
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
} else if err != nil {
|
|
|
|
return offsets, err
|
|
|
|
}
|
2017-01-04 14:04:13 -07:00
|
|
|
hf.Roll(bt)
|
2016-12-14 16:30:29 -07:00
|
|
|
}
|
|
|
|
return offsets, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewFinder(path string, size int, hashesToFind []uint32) (*Finder, error) {
|
|
|
|
file, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
offsets, err := Find(file, hashesToFind, size)
|
|
|
|
if err != nil {
|
|
|
|
file.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &Finder{
|
|
|
|
file: file,
|
|
|
|
size: size,
|
|
|
|
offsets: offsets,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type Finder struct {
|
|
|
|
file *os.File
|
|
|
|
size int
|
|
|
|
offsets map[uint32][]int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate iterates all available blocks that matches the provided hash, reads
|
|
|
|
// them into buf, and calls the iterator function. The iterator function should
|
|
|
|
// return wether it wishes to continue interating.
|
|
|
|
func (h *Finder) Iterate(hash uint32, buf []byte, iterFunc func(int64) bool) (bool, error) {
|
|
|
|
if h == nil || hash == 0 || len(buf) != h.size {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, offset := range h.offsets[hash] {
|
|
|
|
_, err := h.file.ReadAt(buf, offset)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if !iterFunc(offset) {
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close releases any resource associated with the finder
|
|
|
|
func (h *Finder) Close() {
|
|
|
|
if h != nil {
|
|
|
|
h.file.Close()
|
|
|
|
}
|
|
|
|
}
|