2021-10-14 09:39:21 -07:00
|
|
|
package aghnet
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/fs"
|
|
|
|
"net"
|
|
|
|
"path"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/AdguardTeam/AdGuardHome/internal/aghos"
|
|
|
|
"github.com/AdguardTeam/golibs/errors"
|
|
|
|
"github.com/AdguardTeam/golibs/log"
|
|
|
|
"github.com/AdguardTeam/golibs/netutil"
|
|
|
|
"github.com/AdguardTeam/golibs/stringutil"
|
|
|
|
"github.com/AdguardTeam/urlfilter"
|
|
|
|
"github.com/AdguardTeam/urlfilter/filterlist"
|
2021-11-16 06:16:38 -07:00
|
|
|
"github.com/AdguardTeam/urlfilter/rules"
|
2021-10-14 09:39:21 -07:00
|
|
|
"github.com/miekg/dns"
|
|
|
|
)
|
|
|
|
|
|
|
|
// DefaultHostsPaths returns the slice of paths default for the operating system
|
|
|
|
// to files and directories which are containing the hosts database. The result
|
2021-11-16 06:16:38 -07:00
|
|
|
// is intended to be used within fs.FS so the initial slash is omitted.
|
2021-10-14 09:39:21 -07:00
|
|
|
func DefaultHostsPaths() (paths []string) {
|
|
|
|
return defaultHostsPaths()
|
|
|
|
}
|
|
|
|
|
|
|
|
// hostsContainerPref is a prefix for logging and wrapping errors in
|
|
|
|
// HostsContainer's methods.
|
|
|
|
const hostsContainerPref = "hosts container"
|
|
|
|
|
|
|
|
// HostsContainer stores the relevant hosts database provided by the OS and
|
|
|
|
// processes both A/AAAA and PTR DNS requests for those.
|
|
|
|
type HostsContainer struct {
|
|
|
|
// engLock protects rulesStrg and engine.
|
|
|
|
engLock *sync.RWMutex
|
|
|
|
|
|
|
|
// rulesStrg stores the rules obtained from the hosts' file.
|
|
|
|
rulesStrg *filterlist.RuleStorage
|
|
|
|
// engine serves rulesStrg.
|
|
|
|
engine *urlfilter.DNSEngine
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
// done is the channel to sign closing the container.
|
|
|
|
done chan struct{}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
// updates is the channel for receiving updated hosts.
|
2021-10-14 09:39:21 -07:00
|
|
|
updates chan *netutil.IPMap
|
2021-11-16 06:16:38 -07:00
|
|
|
// last is the set of hosts that was cached within last detected change.
|
|
|
|
last *netutil.IPMap
|
2021-10-14 09:39:21 -07:00
|
|
|
|
|
|
|
// fsys is the working file system to read hosts files from.
|
|
|
|
fsys fs.FS
|
|
|
|
|
|
|
|
// w tracks the changes in specified files and directories.
|
|
|
|
w aghos.FSWatcher
|
|
|
|
// patterns stores specified paths in the fs.Glob-compatible form.
|
|
|
|
patterns []string
|
|
|
|
}
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
// ErrNoHostsPaths is returned when there are no valid paths to watch passed to
|
|
|
|
// the HostsContainer.
|
|
|
|
const ErrNoHostsPaths errors.Error = "no valid paths to hosts files provided"
|
2021-10-14 09:39:21 -07:00
|
|
|
|
|
|
|
// NewHostsContainer creates a container of hosts, that watches the paths with
|
2021-11-17 07:21:10 -07:00
|
|
|
// w. paths shouldn't be empty and each of paths should locate either a file or
|
2021-10-14 09:39:21 -07:00
|
|
|
// a directory in fsys. fsys and w must be non-nil.
|
|
|
|
func NewHostsContainer(
|
|
|
|
fsys fs.FS,
|
|
|
|
w aghos.FSWatcher,
|
|
|
|
paths ...string,
|
|
|
|
) (hc *HostsContainer, err error) {
|
|
|
|
defer func() { err = errors.Annotate(err, "%s: %w", hostsContainerPref) }()
|
|
|
|
|
|
|
|
if len(paths) == 0 {
|
2021-11-17 07:21:10 -07:00
|
|
|
return nil, ErrNoHostsPaths
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
var patterns []string
|
|
|
|
patterns, err = pathsToPatterns(fsys, paths)
|
2021-10-14 09:39:21 -07:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2021-11-17 07:21:10 -07:00
|
|
|
} else if len(patterns) == 0 {
|
|
|
|
return nil, ErrNoHostsPaths
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
hc = &HostsContainer{
|
|
|
|
engLock: &sync.RWMutex{},
|
2021-11-17 07:21:10 -07:00
|
|
|
done: make(chan struct{}, 1),
|
2021-10-14 09:39:21 -07:00
|
|
|
updates: make(chan *netutil.IPMap, 1),
|
|
|
|
fsys: fsys,
|
|
|
|
w: w,
|
|
|
|
patterns: patterns,
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug("%s: starting", hostsContainerPref)
|
|
|
|
|
|
|
|
// Load initially.
|
|
|
|
if err = hc.refresh(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, p := range paths {
|
2021-11-17 07:21:10 -07:00
|
|
|
if err = w.Add(p); err != nil {
|
|
|
|
if !errors.Is(err, fs.ErrNotExist) {
|
|
|
|
return nil, fmt.Errorf("adding path: %w", err)
|
|
|
|
}
|
2021-10-14 09:39:21 -07:00
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
log.Debug("%s: file %q expected to exist but doesn't", hostsContainerPref, p)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
go hc.handleEvents()
|
|
|
|
|
|
|
|
return hc, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MatchRequest is the request processing method to resolve hostnames and
|
2021-11-11 06:19:33 -07:00
|
|
|
// addresses from the operating system's hosts files. res is nil for any
|
|
|
|
// request having not an A/AAAA or PTR type. It's safe for concurrent use.
|
2021-10-14 09:39:21 -07:00
|
|
|
func (hc *HostsContainer) MatchRequest(
|
|
|
|
req urlfilter.DNSRequest,
|
2021-11-11 06:19:33 -07:00
|
|
|
) (res *urlfilter.DNSResult, ok bool) {
|
2021-10-14 09:39:21 -07:00
|
|
|
switch req.DNSType {
|
|
|
|
case dns.TypeA, dns.TypeAAAA, dns.TypePTR:
|
|
|
|
log.Debug("%s: handling the request", hostsContainerPref)
|
|
|
|
default:
|
2021-11-11 06:19:33 -07:00
|
|
|
return nil, false
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
hc.engLock.RLock()
|
|
|
|
defer hc.engLock.RUnlock()
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
return hc.engine.MatchRequest(req)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
// Close implements the io.Closer interface for *HostsContainer. Close must
|
|
|
|
// only be called once. The returned err is always nil.
|
2021-10-14 09:39:21 -07:00
|
|
|
func (hc *HostsContainer) Close() (err error) {
|
2021-11-16 06:16:38 -07:00
|
|
|
log.Debug("%s: closing", hostsContainerPref)
|
2021-10-14 09:39:21 -07:00
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
close(hc.done)
|
|
|
|
|
|
|
|
return nil
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
// Upd returns the channel into which the updates are sent. The receivable
|
|
|
|
// map's values are guaranteed to be of type of *stringutil.Set.
|
2021-10-14 09:39:21 -07:00
|
|
|
func (hc *HostsContainer) Upd() (updates <-chan *netutil.IPMap) {
|
|
|
|
return hc.updates
|
|
|
|
}
|
|
|
|
|
|
|
|
// pathsToPatterns converts paths into patterns compatible with fs.Glob.
|
|
|
|
func pathsToPatterns(fsys fs.FS, paths []string) (patterns []string, err error) {
|
|
|
|
for i, p := range paths {
|
|
|
|
var fi fs.FileInfo
|
2021-11-17 07:21:10 -07:00
|
|
|
fi, err = fs.Stat(fsys, p)
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't put a filename here since it's already added by fs.Stat.
|
|
|
|
return nil, fmt.Errorf("path at index %d: %w", i, err)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if fi.IsDir() {
|
|
|
|
p = path.Join(p, "*")
|
|
|
|
}
|
|
|
|
|
|
|
|
patterns = append(patterns, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return patterns, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleEvents concurrently handles the events. It closes the update channel
|
|
|
|
// of HostsContainer when finishes. Used to be called within a goroutine.
|
|
|
|
func (hc *HostsContainer) handleEvents() {
|
|
|
|
defer log.OnPanic(fmt.Sprintf("%s: handling events", hostsContainerPref))
|
|
|
|
|
|
|
|
defer close(hc.updates)
|
|
|
|
|
2021-11-17 07:21:10 -07:00
|
|
|
ok, eventsCh := true, hc.w.Events()
|
|
|
|
for ok {
|
|
|
|
select {
|
|
|
|
case _, ok = <-eventsCh:
|
|
|
|
if !ok {
|
|
|
|
log.Debug("%s: watcher closed the events channel", hostsContainerPref)
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := hc.refresh(); err != nil {
|
|
|
|
log.Error("%s: %s", hostsContainerPref, err)
|
|
|
|
}
|
|
|
|
case _, ok = <-hc.done:
|
|
|
|
// Go on.
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// hostsParser is a helper type to parse rules from the operating system's hosts
|
|
|
|
// file.
|
|
|
|
type hostsParser struct {
|
|
|
|
// rules builds the resulting rules list content.
|
|
|
|
rules *strings.Builder
|
|
|
|
|
|
|
|
// table stores only the unique IP-hostname pairs. It's also sent to the
|
|
|
|
// updates channel afterwards.
|
|
|
|
table *netutil.IPMap
|
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hc *HostsContainer) newHostsParser() (hp *hostsParser) {
|
|
|
|
return &hostsParser{
|
|
|
|
rules: &strings.Builder{},
|
|
|
|
table: netutil.NewIPMap(hc.last.Len()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseFile is a aghos.FileWalker for parsing the files with hosts syntax. It
|
|
|
|
// never signs to stop walking and never returns any additional patterns.
|
2021-10-14 09:39:21 -07:00
|
|
|
//
|
|
|
|
// See man hosts(5).
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) parseFile(
|
2021-10-14 09:39:21 -07:00
|
|
|
r io.Reader,
|
|
|
|
) (patterns []string, cont bool, err error) {
|
|
|
|
s := bufio.NewScanner(r)
|
|
|
|
for s.Scan() {
|
|
|
|
ip, hosts := hp.parseLine(s.Text())
|
|
|
|
if ip == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, host := range hosts {
|
|
|
|
hp.addPair(ip, host)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, true, s.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseLine parses the line having the hosts syntax ignoring invalid ones.
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) parseLine(line string) (ip net.IP, hosts []string) {
|
2021-10-14 09:39:21 -07:00
|
|
|
line = strings.TrimSpace(line)
|
|
|
|
fields := strings.Fields(line)
|
|
|
|
if len(fields) < 2 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if ip = net.ParseIP(fields[0]); ip == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
loop:
|
|
|
|
for _, f := range fields[1:] {
|
|
|
|
switch hashIdx := strings.IndexByte(f, '#'); hashIdx {
|
|
|
|
case 0:
|
|
|
|
// The rest of the fields are a part of the comment so skip
|
|
|
|
// immediately.
|
|
|
|
break loop
|
|
|
|
case -1:
|
|
|
|
hosts = append(hosts, f)
|
|
|
|
default:
|
|
|
|
// Only a part of the field is a comment.
|
|
|
|
hosts = append(hosts, f[:hashIdx])
|
|
|
|
|
|
|
|
break loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ip, hosts
|
|
|
|
}
|
|
|
|
|
|
|
|
// add returns true if the pair of ip and host wasn't added to the hp before.
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) add(ip net.IP, host string) (added bool) {
|
2021-10-14 09:39:21 -07:00
|
|
|
v, ok := hp.table.Get(ip)
|
2021-11-16 06:16:38 -07:00
|
|
|
hosts, _ := v.(*stringutil.Set)
|
|
|
|
switch {
|
|
|
|
case ok && hosts.Has(host):
|
2021-10-14 09:39:21 -07:00
|
|
|
return false
|
2021-11-16 06:16:38 -07:00
|
|
|
case hosts == nil:
|
|
|
|
hosts = stringutil.NewSet(host)
|
|
|
|
hp.table.Set(ip, hosts)
|
|
|
|
default:
|
|
|
|
hosts.Add(host)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// addPair puts the pair of ip and host to the rules builder if needed.
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) addPair(ip net.IP, host string) {
|
2021-10-14 09:39:21 -07:00
|
|
|
arpa, err := netutil.IPToReversedAddr(ip)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if !hp.add(ip, host) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
qtype := "AAAA"
|
|
|
|
if ip.To4() != nil {
|
|
|
|
// Assume the validation of the IP address is performed already.
|
|
|
|
qtype = "A"
|
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
const (
|
|
|
|
nl = "\n"
|
|
|
|
sc = ";"
|
|
|
|
|
|
|
|
rewriteSuccess = "$dnsrewrite=NOERROR" + sc
|
|
|
|
rewriteSuccessPTR = rewriteSuccess + "PTR" + sc
|
|
|
|
)
|
|
|
|
|
|
|
|
ipStr := ip.String()
|
|
|
|
fqdn := dns.Fqdn(host)
|
|
|
|
|
|
|
|
for _, ruleData := range [...][]string{{
|
|
|
|
// A/AAAA.
|
|
|
|
rules.MaskStartURL,
|
2021-10-14 09:39:21 -07:00
|
|
|
host,
|
2021-11-16 06:16:38 -07:00
|
|
|
rules.MaskSeparator,
|
|
|
|
rewriteSuccess,
|
2021-10-14 09:39:21 -07:00
|
|
|
qtype,
|
2021-11-16 06:16:38 -07:00
|
|
|
sc,
|
|
|
|
ipStr,
|
|
|
|
nl,
|
|
|
|
}, {
|
|
|
|
// PTR.
|
|
|
|
rules.MaskStartURL,
|
2021-10-14 09:39:21 -07:00
|
|
|
arpa,
|
2021-11-16 06:16:38 -07:00
|
|
|
rules.MaskSeparator,
|
|
|
|
rewriteSuccessPTR,
|
|
|
|
fqdn,
|
|
|
|
nl,
|
|
|
|
}} {
|
|
|
|
stringutil.WriteToBuilder(hp.rules, ruleData...)
|
|
|
|
}
|
2021-10-14 09:39:21 -07:00
|
|
|
|
|
|
|
log.Debug("%s: added ip-host pair %q/%q", hostsContainerPref, ip, host)
|
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
// equalSet returns true if the internal hosts table just parsed equals target.
|
|
|
|
func (hp *hostsParser) equalSet(target *netutil.IPMap) (ok bool) {
|
2021-11-22 07:22:59 -07:00
|
|
|
if target == nil {
|
|
|
|
// hp.table shouldn't appear nil since it's initialized on each refresh.
|
|
|
|
return target == hp.table
|
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
if hp.table.Len() != target.Len() {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
hp.table.Range(func(ip net.IP, val interface{}) (cont bool) {
|
|
|
|
v, hasIP := target.Get(ip)
|
|
|
|
// ok is set to true if the target doesn't contain ip or if the
|
|
|
|
// appropriate hosts set isn't equal to the checked one, i.e. the maps
|
|
|
|
// have at least one disperancy.
|
|
|
|
ok = !hasIP || !v.(*stringutil.Set).Equal(val.(*stringutil.Set))
|
|
|
|
|
|
|
|
// Continue only if maps has no discrepancies.
|
|
|
|
return !ok
|
|
|
|
})
|
|
|
|
|
|
|
|
// Return true if every value from the IP map has no disperancies with the
|
|
|
|
// appropriate one from the target.
|
|
|
|
return !ok
|
|
|
|
}
|
|
|
|
|
2021-10-14 09:39:21 -07:00
|
|
|
// sendUpd tries to send the parsed data to the ch.
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) sendUpd(ch chan *netutil.IPMap) {
|
2021-10-14 09:39:21 -07:00
|
|
|
log.Debug("%s: sending upd", hostsContainerPref)
|
2021-11-16 06:16:38 -07:00
|
|
|
|
|
|
|
upd := hp.table
|
2021-10-14 09:39:21 -07:00
|
|
|
select {
|
2021-11-16 06:16:38 -07:00
|
|
|
case ch <- upd:
|
2021-10-14 09:39:21 -07:00
|
|
|
// Updates are delivered. Go on.
|
2021-11-16 06:16:38 -07:00
|
|
|
case <-ch:
|
|
|
|
ch <- upd
|
|
|
|
log.Debug("%s: replaced the last update", hostsContainerPref)
|
|
|
|
case ch <- upd:
|
|
|
|
// The previous update was just read and the next one pushed. Go on.
|
2021-10-14 09:39:21 -07:00
|
|
|
default:
|
2021-11-16 06:16:38 -07:00
|
|
|
log.Debug("%s: the channel is broken", hostsContainerPref)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// newStrg creates a new rules storage from parsed data.
|
2021-11-16 06:16:38 -07:00
|
|
|
func (hp *hostsParser) newStrg() (s *filterlist.RuleStorage, err error) {
|
2021-10-14 09:39:21 -07:00
|
|
|
return filterlist.NewRuleStorage([]filterlist.RuleList{&filterlist.StringRuleList{
|
2021-11-16 06:16:38 -07:00
|
|
|
ID: -1,
|
2021-10-14 09:39:21 -07:00
|
|
|
RulesText: hp.rules.String(),
|
|
|
|
IgnoreCosmetic: true,
|
|
|
|
}})
|
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
// refresh gets the data from specified files and propagates the updates if
|
|
|
|
// needed.
|
2021-11-17 07:21:10 -07:00
|
|
|
//
|
|
|
|
// TODO(e.burkov): Accept a parameter to specify the files to refresh.
|
2021-10-14 09:39:21 -07:00
|
|
|
func (hc *HostsContainer) refresh() (err error) {
|
|
|
|
log.Debug("%s: refreshing", hostsContainerPref)
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
hp := hc.newHostsParser()
|
|
|
|
if _, err = aghos.FileWalker(hp.parseFile).Walk(hc.fsys, hc.patterns...); err != nil {
|
|
|
|
return fmt.Errorf("refreshing : %w", err)
|
2021-10-14 09:39:21 -07:00
|
|
|
}
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
if hp.equalSet(hc.last) {
|
|
|
|
log.Debug("%s: no updates detected", hostsContainerPref)
|
2021-10-14 09:39:21 -07:00
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
return nil
|
|
|
|
}
|
2021-10-14 09:39:21 -07:00
|
|
|
defer hp.sendUpd(hc.updates)
|
|
|
|
|
2021-11-16 06:16:38 -07:00
|
|
|
hc.last = hp.table.ShallowClone()
|
|
|
|
|
2021-10-14 09:39:21 -07:00
|
|
|
var rulesStrg *filterlist.RuleStorage
|
|
|
|
if rulesStrg, err = hp.newStrg(); err != nil {
|
|
|
|
return fmt.Errorf("initializing rules storage: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
hc.resetEng(rulesStrg)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (hc *HostsContainer) resetEng(rulesStrg *filterlist.RuleStorage) {
|
|
|
|
hc.engLock.Lock()
|
|
|
|
defer hc.engLock.Unlock()
|
|
|
|
|
|
|
|
hc.rulesStrg = rulesStrg
|
|
|
|
hc.engine = urlfilter.NewDNSEngine(hc.rulesStrg)
|
|
|
|
}
|