2019-06-10 01:33:19 -07:00
|
|
|
package home
|
2018-11-28 10:14:54 -07:00
|
|
|
|
|
|
|
import (
|
2020-03-17 05:00:40 -07:00
|
|
|
"bufio"
|
2018-11-28 10:14:54 -07:00
|
|
|
"fmt"
|
2019-03-15 06:49:10 -07:00
|
|
|
"hash/crc32"
|
2020-03-17 05:00:40 -07:00
|
|
|
"io"
|
2020-11-20 07:32:41 -07:00
|
|
|
"net/http"
|
2018-11-28 10:14:54 -07:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2019-10-10 07:12:32 -07:00
|
|
|
"sync"
|
2020-02-26 09:58:25 -07:00
|
|
|
"sync/atomic"
|
2018-11-28 10:14:54 -07:00
|
|
|
"time"
|
|
|
|
|
2021-05-21 06:15:47 -07:00
|
|
|
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
|
2021-05-24 07:28:11 -07:00
|
|
|
"github.com/AdguardTeam/golibs/errors"
|
2019-02-25 06:44:22 -07:00
|
|
|
"github.com/AdguardTeam/golibs/log"
|
2018-11-28 10:14:54 -07:00
|
|
|
)
|
|
|
|
|
2020-11-05 05:20:57 -07:00
|
|
|
var nextFilterID = time.Now().Unix() // semi-stable way to generate an unique ID
|
2020-03-17 05:00:40 -07:00
|
|
|
|
|
|
|
// Filtering - module object
|
|
|
|
type Filtering struct {
|
|
|
|
// conf FilteringConf
|
2019-10-10 07:12:32 -07:00
|
|
|
refreshStatus uint32 // 0:none; 1:in progress
|
|
|
|
refreshLock sync.Mutex
|
2020-03-17 05:00:40 -07:00
|
|
|
filterTitleRegexp *regexp.Regexp
|
|
|
|
}
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
// Init - initialize the module
|
|
|
|
func (f *Filtering) Init() {
|
|
|
|
f.filterTitleRegexp = regexp.MustCompile(`^! Title: +(.*)$`)
|
2020-11-05 05:20:57 -07:00
|
|
|
_ = os.MkdirAll(filepath.Join(Context.getDataDir(), filterDir), 0o755)
|
2020-03-17 05:00:40 -07:00
|
|
|
f.loadFilters(config.Filters)
|
|
|
|
f.loadFilters(config.WhitelistFilters)
|
2019-09-04 04:12:00 -07:00
|
|
|
deduplicateFilters()
|
|
|
|
updateUniqueFilterID(config.Filters)
|
2020-02-26 09:58:25 -07:00
|
|
|
updateUniqueFilterID(config.WhitelistFilters)
|
2019-10-17 04:33:38 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
// Start - start the module
|
|
|
|
func (f *Filtering) Start() {
|
|
|
|
f.RegisterFilteringHandlers()
|
|
|
|
|
2019-10-17 04:33:38 -07:00
|
|
|
// Here we should start updating filters,
|
|
|
|
// but currently we can't wake up the periodic task to do so.
|
|
|
|
// So for now we just start this periodic task from here.
|
2020-03-17 05:00:40 -07:00
|
|
|
go f.periodicallyRefreshFilters()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close - close the module
|
|
|
|
func (f *Filtering) Close() {
|
2019-09-04 04:12:00 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func defaultFilters() []filter {
|
|
|
|
return []filter{
|
2021-05-21 06:15:47 -07:00
|
|
|
{Filter: filtering.Filter{ID: 1}, Enabled: true, URL: "https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt", Name: "AdGuard DNS filter"},
|
|
|
|
{Filter: filtering.Filter{ID: 2}, Enabled: false, URL: "https://adaway.org/hosts.txt", Name: "AdAway Default Blocklist"},
|
|
|
|
{Filter: filtering.Filter{ID: 4}, Enabled: false, URL: "https://www.malwaredomainlist.com/hostslist/hosts.txt", Name: "MalwareDomainList.com Hosts List"},
|
2019-09-04 04:12:00 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-28 10:14:54 -07:00
|
|
|
// field ordering is important -- yaml fields will mirror ordering from here
|
|
|
|
type filter struct {
|
2019-09-04 04:12:00 -07:00
|
|
|
Enabled bool
|
2020-03-05 04:37:43 -07:00
|
|
|
URL string // URL or a file path
|
2019-09-04 04:12:00 -07:00
|
|
|
Name string `yaml:"name"`
|
|
|
|
RulesCount int `yaml:"-"`
|
|
|
|
LastUpdated time.Time `yaml:"-"`
|
2019-03-15 06:49:10 -07:00
|
|
|
checksum uint32 // checksum of the file data
|
2020-02-26 09:58:25 -07:00
|
|
|
white bool
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2021-05-21 06:15:47 -07:00
|
|
|
filtering.Filter `yaml:",inline"`
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2019-11-06 05:56:29 -07:00
|
|
|
const (
|
|
|
|
statusFound = 1
|
|
|
|
statusEnabledChanged = 2
|
|
|
|
statusURLChanged = 4
|
|
|
|
statusURLExists = 8
|
2020-03-11 09:17:46 -07:00
|
|
|
statusUpdateRequired = 0x10
|
2019-11-06 05:56:29 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
// Update properties for a filter specified by its URL
|
|
|
|
// Return status* flags.
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) filterSetProperties(url string, newf filter, whitelist bool) int {
|
2019-11-06 05:56:29 -07:00
|
|
|
r := 0
|
2019-03-18 04:12:04 -07:00
|
|
|
config.Lock()
|
2019-11-06 05:56:29 -07:00
|
|
|
defer config.Unlock()
|
|
|
|
|
2020-02-26 09:58:25 -07:00
|
|
|
filters := &config.Filters
|
|
|
|
if whitelist {
|
|
|
|
filters = &config.WhitelistFilters
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range *filters {
|
2020-03-17 05:00:40 -07:00
|
|
|
filt := &(*filters)[i]
|
|
|
|
if filt.URL != url {
|
2019-11-06 05:56:29 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug("filter: set properties: %s: {%s %s %v}",
|
2020-03-17 05:00:40 -07:00
|
|
|
filt.URL, newf.Name, newf.URL, newf.Enabled)
|
|
|
|
filt.Name = newf.Name
|
2019-11-06 05:56:29 -07:00
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
if filt.URL != newf.URL {
|
2020-03-11 09:17:46 -07:00
|
|
|
r |= statusURLChanged | statusUpdateRequired
|
2019-11-06 05:56:29 -07:00
|
|
|
if filterExistsNoLock(newf.URL) {
|
|
|
|
return statusURLExists
|
|
|
|
}
|
2020-03-17 05:00:40 -07:00
|
|
|
filt.URL = newf.URL
|
|
|
|
filt.unload()
|
|
|
|
filt.LastUpdated = time.Time{}
|
|
|
|
filt.checksum = 0
|
|
|
|
filt.RulesCount = 0
|
2019-11-06 05:56:29 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
if filt.Enabled != newf.Enabled {
|
2019-11-06 05:56:29 -07:00
|
|
|
r |= statusEnabledChanged
|
2020-03-17 05:00:40 -07:00
|
|
|
filt.Enabled = newf.Enabled
|
|
|
|
if filt.Enabled {
|
2019-11-06 05:56:29 -07:00
|
|
|
if (r & statusURLChanged) == 0 {
|
2020-03-17 05:00:40 -07:00
|
|
|
e := f.load(filt)
|
2019-11-06 05:56:29 -07:00
|
|
|
if e != nil {
|
|
|
|
// This isn't a fatal error,
|
|
|
|
// because it may occur when someone removes the file from disk.
|
2020-03-17 05:00:40 -07:00
|
|
|
filt.LastUpdated = time.Time{}
|
|
|
|
filt.checksum = 0
|
|
|
|
filt.RulesCount = 0
|
2020-03-11 09:17:46 -07:00
|
|
|
r |= statusUpdateRequired
|
2019-11-06 05:56:29 -07:00
|
|
|
}
|
2019-03-18 04:12:04 -07:00
|
|
|
}
|
|
|
|
} else {
|
2020-03-17 05:00:40 -07:00
|
|
|
filt.unload()
|
2019-03-18 04:12:04 -07:00
|
|
|
}
|
|
|
|
}
|
2019-11-06 05:56:29 -07:00
|
|
|
|
|
|
|
return r | statusFound
|
2019-03-18 04:12:04 -07:00
|
|
|
}
|
2019-11-06 05:56:29 -07:00
|
|
|
return 0
|
2019-03-18 04:12:04 -07:00
|
|
|
}
|
|
|
|
|
2019-03-18 04:41:38 -07:00
|
|
|
// Return TRUE if a filter with this URL exists
|
|
|
|
func filterExists(url string) bool {
|
|
|
|
config.RLock()
|
2019-11-06 05:56:29 -07:00
|
|
|
r := filterExistsNoLock(url)
|
|
|
|
config.RUnlock()
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
func filterExistsNoLock(url string) bool {
|
2020-02-26 09:58:25 -07:00
|
|
|
for _, f := range config.Filters {
|
|
|
|
if f.URL == url {
|
|
|
|
return true
|
2019-03-18 04:41:38 -07:00
|
|
|
}
|
|
|
|
}
|
2020-02-26 09:58:25 -07:00
|
|
|
for _, f := range config.WhitelistFilters {
|
|
|
|
if f.URL == url {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
2019-03-18 04:41:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add a filter
|
|
|
|
// Return FALSE if a filter with this URL exists
|
|
|
|
func filterAdd(f filter) bool {
|
|
|
|
config.Lock()
|
2020-02-26 09:58:25 -07:00
|
|
|
defer config.Unlock()
|
2019-03-18 04:41:38 -07:00
|
|
|
|
|
|
|
// Check for duplicates
|
2020-02-26 09:58:25 -07:00
|
|
|
if filterExistsNoLock(f.URL) {
|
|
|
|
return false
|
2019-03-18 04:41:38 -07:00
|
|
|
}
|
|
|
|
|
2020-02-26 09:58:25 -07:00
|
|
|
if f.white {
|
|
|
|
config.WhitelistFilters = append(config.WhitelistFilters, f)
|
|
|
|
} else {
|
|
|
|
config.Filters = append(config.Filters, f)
|
|
|
|
}
|
2019-03-18 04:41:38 -07:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2019-03-15 09:41:45 -07:00
|
|
|
// Load filters from the disk
|
|
|
|
// And if any filter has zero ID, assign a new one
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) loadFilters(array []filter) {
|
2020-02-26 09:58:25 -07:00
|
|
|
for i := range array {
|
|
|
|
filter := &array[i] // otherwise we're operating on a copy
|
2019-03-15 09:41:45 -07:00
|
|
|
if filter.ID == 0 {
|
|
|
|
filter.ID = assignUniqueFilterID()
|
|
|
|
}
|
2019-03-18 02:52:34 -07:00
|
|
|
|
|
|
|
if !filter.Enabled {
|
|
|
|
// No need to load a filter that is not enabled
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
err := f.load(filter)
|
2019-03-15 09:41:45 -07:00
|
|
|
if err != nil {
|
2019-09-04 04:12:00 -07:00
|
|
|
log.Error("Couldn't load filter %d contents due to %s", filter.ID, err)
|
2019-03-15 09:41:45 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-28 10:14:54 -07:00
|
|
|
func deduplicateFilters() {
|
|
|
|
// Deduplicate filters
|
|
|
|
i := 0 // output index, used for deletion later
|
|
|
|
urls := map[string]bool{}
|
|
|
|
for _, filter := range config.Filters {
|
|
|
|
if _, ok := urls[filter.URL]; !ok {
|
|
|
|
// we didn't see it before, keep it
|
|
|
|
urls[filter.URL] = true // remember the URL
|
|
|
|
config.Filters[i] = filter
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// all entries we want to keep are at front, delete the rest
|
|
|
|
config.Filters = config.Filters[:i]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the next filter ID to max(filter.ID) + 1
|
|
|
|
func updateUniqueFilterID(filters []filter) {
|
|
|
|
for _, filter := range filters {
|
|
|
|
if nextFilterID < filter.ID {
|
|
|
|
nextFilterID = filter.ID + 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func assignUniqueFilterID() int64 {
|
|
|
|
value := nextFilterID
|
2019-01-24 10:11:01 -07:00
|
|
|
nextFilterID++
|
2018-11-28 10:14:54 -07:00
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sets up a timer that will be checking for filters updates periodically
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) periodicallyRefreshFilters() {
|
2020-01-15 08:41:27 -07:00
|
|
|
const maxInterval = 1 * 60 * 60
|
|
|
|
intval := 5 // use a dynamically increasing time interval
|
2019-09-04 04:12:00 -07:00
|
|
|
for {
|
2020-01-15 08:41:27 -07:00
|
|
|
isNetworkErr := false
|
2020-03-17 05:00:40 -07:00
|
|
|
if config.DNS.FiltersUpdateIntervalHours != 0 && atomic.CompareAndSwapUint32(&f.refreshStatus, 0, 1) {
|
|
|
|
f.refreshLock.Lock()
|
2020-12-07 06:04:53 -07:00
|
|
|
_, isNetworkErr = f.refreshFiltersIfNecessary(filterRefreshBlocklists | filterRefreshAllowlists)
|
2020-03-17 05:00:40 -07:00
|
|
|
f.refreshLock.Unlock()
|
|
|
|
f.refreshStatus = 0
|
2020-01-28 04:07:11 -07:00
|
|
|
if !isNetworkErr {
|
2020-01-15 08:41:27 -07:00
|
|
|
intval = maxInterval
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if isNetworkErr {
|
|
|
|
intval *= 2
|
|
|
|
if intval > maxInterval {
|
|
|
|
intval = maxInterval
|
|
|
|
}
|
2019-10-09 09:51:26 -07:00
|
|
|
}
|
2020-01-15 08:41:27 -07:00
|
|
|
|
|
|
|
time.Sleep(time.Duration(intval) * time.Second)
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-10 07:12:32 -07:00
|
|
|
// Refresh filters
|
2020-12-07 06:04:53 -07:00
|
|
|
// flags: filterRefresh*
|
2020-02-28 02:40:16 -07:00
|
|
|
// important:
|
|
|
|
// TRUE: ignore the fact that we're currently updating the filters
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) refreshFilters(flags int, important bool) (int, error) {
|
|
|
|
set := atomic.CompareAndSwapUint32(&f.refreshStatus, 0, 1)
|
2020-02-28 02:40:16 -07:00
|
|
|
if !important && !set {
|
2020-04-05 08:34:43 -07:00
|
|
|
return 0, fmt.Errorf("filters update procedure is already running")
|
2019-10-10 07:12:32 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
f.refreshLock.Lock()
|
|
|
|
nUpdated, _ := f.refreshFiltersIfNecessary(flags)
|
|
|
|
f.refreshLock.Unlock()
|
|
|
|
f.refreshStatus = 0
|
2019-10-10 07:12:32 -07:00
|
|
|
return nUpdated, nil
|
2019-10-09 09:51:26 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) refreshFiltersArray(filters *[]filter, force bool) (int, []filter, []bool, bool) {
|
2019-03-18 07:23:02 -07:00
|
|
|
var updateFilters []filter
|
2019-07-16 02:55:18 -07:00
|
|
|
var updateFlags []bool // 'true' if filter data has changed
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2019-09-04 04:12:00 -07:00
|
|
|
now := time.Now()
|
2019-03-18 07:23:02 -07:00
|
|
|
config.RLock()
|
2020-02-28 02:40:16 -07:00
|
|
|
for i := range *filters {
|
|
|
|
f := &(*filters)[i] // otherwise we will be operating on a copy
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2019-03-18 07:23:02 -07:00
|
|
|
if !f.Enabled {
|
2019-03-18 02:52:34 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-09-04 04:12:00 -07:00
|
|
|
expireTime := f.LastUpdated.Unix() + int64(config.DNS.FiltersUpdateIntervalHours)*60*60
|
|
|
|
if !force && expireTime > now.Unix() {
|
2019-03-15 06:09:43 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-03-18 07:23:02 -07:00
|
|
|
var uf filter
|
|
|
|
uf.ID = f.ID
|
|
|
|
uf.URL = f.URL
|
2019-03-26 09:04:50 -07:00
|
|
|
uf.Name = f.Name
|
2019-03-18 07:23:02 -07:00
|
|
|
uf.checksum = f.checksum
|
|
|
|
updateFilters = append(updateFilters, uf)
|
|
|
|
}
|
|
|
|
config.RUnlock()
|
|
|
|
|
2020-01-28 04:07:11 -07:00
|
|
|
if len(updateFilters) == 0 {
|
2020-02-28 02:40:16 -07:00
|
|
|
return 0, nil, nil, false
|
2020-01-28 04:07:11 -07:00
|
|
|
}
|
|
|
|
|
2020-01-15 08:41:27 -07:00
|
|
|
nfail := 0
|
2019-03-18 07:23:02 -07:00
|
|
|
for i := range updateFilters {
|
|
|
|
uf := &updateFilters[i]
|
2020-03-17 05:00:40 -07:00
|
|
|
updated, err := f.update(uf)
|
2019-07-16 05:29:36 -07:00
|
|
|
updateFlags = append(updateFlags, updated)
|
2018-11-28 10:14:54 -07:00
|
|
|
if err != nil {
|
2020-01-15 08:41:27 -07:00
|
|
|
nfail++
|
2019-03-18 07:23:02 -07:00
|
|
|
log.Printf("Failed to update filter %s: %s\n", uf.URL, err)
|
2018-11-28 10:14:54 -07:00
|
|
|
continue
|
|
|
|
}
|
2019-07-16 04:32:58 -07:00
|
|
|
}
|
2019-07-16 02:55:18 -07:00
|
|
|
|
2020-01-15 08:41:27 -07:00
|
|
|
if nfail == len(updateFilters) {
|
2020-02-28 02:40:16 -07:00
|
|
|
return 0, nil, nil, true
|
2020-01-15 08:41:27 -07:00
|
|
|
}
|
|
|
|
|
2019-10-09 09:51:26 -07:00
|
|
|
updateCount := 0
|
2019-07-16 02:55:18 -07:00
|
|
|
for i := range updateFilters {
|
|
|
|
uf := &updateFilters[i]
|
|
|
|
updated := updateFlags[i]
|
2019-03-18 07:23:02 -07:00
|
|
|
|
|
|
|
config.Lock()
|
2020-02-28 02:40:16 -07:00
|
|
|
for k := range *filters {
|
|
|
|
f := &(*filters)[k]
|
2019-03-18 07:23:02 -07:00
|
|
|
if f.ID != uf.ID || f.URL != uf.URL {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
f.LastUpdated = uf.LastUpdated
|
|
|
|
if !updated {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("Updated filter #%d. Rules: %d -> %d",
|
|
|
|
f.ID, f.RulesCount, uf.RulesCount)
|
|
|
|
f.Name = uf.Name
|
|
|
|
f.RulesCount = uf.RulesCount
|
|
|
|
f.checksum = uf.checksum
|
|
|
|
updateCount++
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
2019-03-18 07:23:02 -07:00
|
|
|
config.Unlock()
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2020-02-28 02:40:16 -07:00
|
|
|
return updateCount, updateFilters, updateFlags, false
|
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
2020-12-07 06:04:53 -07:00
|
|
|
filterRefreshForce = 1 // ignore last file modification date
|
|
|
|
filterRefreshAllowlists = 2 // update allow-lists
|
|
|
|
filterRefreshBlocklists = 4 // update block-lists
|
2020-02-28 02:40:16 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
// Checks filters updates if necessary
|
|
|
|
// If force is true, it ignores the filter.LastUpdated field value
|
2020-12-07 06:04:53 -07:00
|
|
|
// flags: filterRefresh*
|
2020-02-28 02:40:16 -07:00
|
|
|
//
|
|
|
|
// Algorithm:
|
|
|
|
// . Get the list of filters to be updated
|
|
|
|
// . For each filter run the download and checksum check operation
|
2020-03-17 05:00:40 -07:00
|
|
|
// . Store downloaded data in a temporary file inside data/filters directory
|
2020-02-28 02:40:16 -07:00
|
|
|
// . For each filter:
|
|
|
|
// . If filter data hasn't changed, just set new update time on file
|
|
|
|
// . If filter data has changed:
|
2020-03-17 05:00:40 -07:00
|
|
|
// . rename the temporary file (<temp> -> 1.txt)
|
|
|
|
// Note that this method works only on UNIX.
|
2021-05-21 06:15:47 -07:00
|
|
|
// On Windows we don't pass files to filtering - we pass the whole data.
|
|
|
|
// . Pass new filters to filtering object - it analyzes new data while the old filters are still active
|
|
|
|
// . filtering activates new filters
|
2020-02-28 02:40:16 -07:00
|
|
|
//
|
|
|
|
// Return the number of updated filters
|
|
|
|
// Return TRUE - there was a network error and nothing could be updated
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) refreshFiltersIfNecessary(flags int) (int, bool) {
|
2020-02-28 02:40:16 -07:00
|
|
|
log.Debug("Filters: updating...")
|
|
|
|
|
|
|
|
updateCount := 0
|
|
|
|
var updateFilters []filter
|
|
|
|
var updateFlags []bool
|
|
|
|
netError := false
|
|
|
|
netErrorW := false
|
|
|
|
force := false
|
2020-12-07 06:04:53 -07:00
|
|
|
if (flags & filterRefreshForce) != 0 {
|
2020-02-28 02:40:16 -07:00
|
|
|
force = true
|
|
|
|
}
|
2020-12-07 06:04:53 -07:00
|
|
|
if (flags & filterRefreshBlocklists) != 0 {
|
2020-03-17 05:00:40 -07:00
|
|
|
updateCount, updateFilters, updateFlags, netError = f.refreshFiltersArray(&config.Filters, force)
|
2020-02-28 02:40:16 -07:00
|
|
|
}
|
2020-12-07 06:04:53 -07:00
|
|
|
if (flags & filterRefreshAllowlists) != 0 {
|
2020-02-28 02:40:16 -07:00
|
|
|
updateCountW := 0
|
|
|
|
var updateFiltersW []filter
|
|
|
|
var updateFlagsW []bool
|
2020-03-17 05:00:40 -07:00
|
|
|
updateCountW, updateFiltersW, updateFlagsW, netErrorW = f.refreshFiltersArray(&config.WhitelistFilters, force)
|
2020-02-28 02:40:16 -07:00
|
|
|
updateCount += updateCountW
|
|
|
|
updateFilters = append(updateFilters, updateFiltersW...)
|
|
|
|
updateFlags = append(updateFlags, updateFlagsW...)
|
|
|
|
}
|
|
|
|
if netError && netErrorW {
|
|
|
|
return 0, true
|
|
|
|
}
|
|
|
|
|
2019-10-09 09:51:26 -07:00
|
|
|
if updateCount != 0 {
|
|
|
|
enableFilters(false)
|
|
|
|
|
|
|
|
for i := range updateFilters {
|
|
|
|
uf := &updateFilters[i]
|
|
|
|
updated := updateFlags[i]
|
|
|
|
if !updated {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
_ = os.Remove(uf.Path() + ".old")
|
2019-01-24 10:11:01 -07:00
|
|
|
}
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
2019-10-09 09:51:26 -07:00
|
|
|
|
|
|
|
log.Debug("Filters: update finished")
|
2020-01-15 08:41:27 -07:00
|
|
|
return updateCount, false
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2019-09-04 09:37:27 -07:00
|
|
|
// Allows printable UTF-8 text with CR, LF, TAB characters
|
2020-04-07 13:29:03 -07:00
|
|
|
func isPrintableText(data []byte, len int) bool {
|
|
|
|
for i := 0; i < len; i++ {
|
|
|
|
c := data[i]
|
2019-09-04 09:37:27 -07:00
|
|
|
if (c >= ' ' && c != 0x7f) || c == '\n' || c == '\r' || c == '\t' {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2018-11-28 10:14:54 -07:00
|
|
|
// A helper function that parses filter contents and returns a number of rules and a filter name (if there's any)
|
2020-03-17 05:00:40 -07:00
|
|
|
func (f *Filtering) parseFilterContents(file io.Reader) (int, uint32, string) {
|
2018-11-28 10:14:54 -07:00
|
|
|
rulesCount := 0
|
|
|
|
name := ""
|
|
|
|
seenTitle := false
|
2020-03-17 05:00:40 -07:00
|
|
|
r := bufio.NewReader(file)
|
|
|
|
checksum := uint32(0)
|
|
|
|
|
|
|
|
for {
|
|
|
|
line, err := r.ReadString('\n')
|
|
|
|
checksum = crc32.Update(checksum, crc32.IEEETable, []byte(line))
|
|
|
|
|
|
|
|
line = strings.TrimSpace(line)
|
2019-03-15 06:02:48 -07:00
|
|
|
if len(line) == 0 {
|
2020-05-18 00:53:28 -07:00
|
|
|
//
|
|
|
|
} else if line[0] == '!' {
|
2020-03-17 05:00:40 -07:00
|
|
|
m := f.filterTitleRegexp.FindAllStringSubmatch(line, -1)
|
2019-03-15 06:02:48 -07:00
|
|
|
if len(m) > 0 && len(m[0]) >= 2 && !seenTitle {
|
2018-11-28 10:14:54 -07:00
|
|
|
name = m[0][1]
|
|
|
|
seenTitle = true
|
|
|
|
}
|
2020-05-18 00:53:28 -07:00
|
|
|
|
2020-04-30 07:19:10 -07:00
|
|
|
} else if line[0] == '#' {
|
2020-05-18 00:53:28 -07:00
|
|
|
//
|
2019-03-15 06:02:48 -07:00
|
|
|
} else {
|
2018-11-28 10:14:54 -07:00
|
|
|
rulesCount++
|
|
|
|
}
|
2020-05-18 00:53:28 -07:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
break
|
|
|
|
}
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
return rulesCount, checksum, name
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform upgrade on a filter and update LastUpdated value
|
|
|
|
func (f *Filtering) update(filter *filter) (bool, error) {
|
|
|
|
b, err := f.updateIntl(filter)
|
|
|
|
filter.LastUpdated = time.Now()
|
|
|
|
if !b {
|
|
|
|
e := os.Chtimes(filter.Path(), filter.LastUpdated, filter.LastUpdated)
|
|
|
|
if e != nil {
|
|
|
|
log.Error("os.Chtimes(): %v", e)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return b, err
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2020-11-20 07:32:41 -07:00
|
|
|
func (f *Filtering) read(reader io.Reader, tmpFile *os.File, filter *filter) (int, error) {
|
2020-03-17 05:00:40 -07:00
|
|
|
htmlTest := true
|
|
|
|
firstChunk := make([]byte, 4*1024)
|
|
|
|
firstChunkLen := 0
|
|
|
|
buf := make([]byte, 64*1024)
|
|
|
|
total := 0
|
|
|
|
for {
|
2020-03-05 04:37:43 -07:00
|
|
|
n, err := reader.Read(buf)
|
2020-03-17 05:00:40 -07:00
|
|
|
total += n
|
|
|
|
|
|
|
|
if htmlTest {
|
2020-11-06 02:15:08 -07:00
|
|
|
num := len(firstChunk) - firstChunkLen
|
|
|
|
if n < num {
|
|
|
|
num = n
|
|
|
|
}
|
2020-03-17 05:00:40 -07:00
|
|
|
copied := copy(firstChunk[firstChunkLen:], buf[:num])
|
|
|
|
firstChunkLen += copied
|
|
|
|
|
|
|
|
if firstChunkLen == len(firstChunk) || err == io.EOF {
|
2020-04-07 13:29:03 -07:00
|
|
|
if !isPrintableText(firstChunk, firstChunkLen) {
|
2020-11-20 07:32:41 -07:00
|
|
|
return total, fmt.Errorf("data contains non-printable characters")
|
2020-03-17 05:00:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
s := strings.ToLower(string(firstChunk))
|
2020-11-06 02:15:08 -07:00
|
|
|
if strings.Contains(s, "<html") || strings.Contains(s, "<!doctype") {
|
2020-11-20 07:32:41 -07:00
|
|
|
return total, fmt.Errorf("data is HTML, not plain text")
|
2020-03-17 05:00:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
htmlTest = false
|
|
|
|
firstChunk = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-07 03:02:28 -07:00
|
|
|
_, err2 := tmpFile.Write(buf[:n])
|
2020-03-17 05:00:40 -07:00
|
|
|
if err2 != nil {
|
2020-11-20 07:32:41 -07:00
|
|
|
return total, err2
|
2020-03-17 05:00:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if err == io.EOF {
|
2020-11-20 07:32:41 -07:00
|
|
|
return total, nil
|
2020-03-17 05:00:40 -07:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("Couldn't fetch filter contents from URL %s, skipping: %s", filter.URL, err)
|
2020-11-20 07:32:41 -07:00
|
|
|
return total, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateIntl returns true if filter update performed successfully.
|
|
|
|
func (f *Filtering) updateIntl(filter *filter) (updated bool, err error) {
|
|
|
|
updated = false
|
|
|
|
log.Tracef("Downloading update for filter %d from %s", filter.ID, filter.URL)
|
|
|
|
|
2021-05-21 04:55:42 -07:00
|
|
|
tmpFile, err := os.CreateTemp(filepath.Join(Context.getDataDir(), filterDir), "")
|
2020-11-20 07:32:41 -07:00
|
|
|
if err != nil {
|
|
|
|
return updated, err
|
|
|
|
}
|
|
|
|
defer func() {
|
2021-03-12 04:32:08 -07:00
|
|
|
var derr error
|
2020-11-20 07:32:41 -07:00
|
|
|
if tmpFile != nil {
|
2021-03-12 04:32:08 -07:00
|
|
|
if derr = tmpFile.Close(); derr != nil {
|
|
|
|
log.Printf("Couldn't close temporary file: %s", derr)
|
2020-11-20 07:32:41 -07:00
|
|
|
}
|
2021-03-12 04:32:08 -07:00
|
|
|
|
2020-11-20 07:32:41 -07:00
|
|
|
tmpFileName := tmpFile.Name()
|
2021-03-12 04:32:08 -07:00
|
|
|
if derr = os.Remove(tmpFileName); derr != nil {
|
|
|
|
log.Printf("Couldn't delete temporary file %s: %s", tmpFileName, derr)
|
2020-11-20 07:32:41 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
var reader io.Reader
|
|
|
|
if filepath.IsAbs(filter.URL) {
|
2021-03-12 04:32:08 -07:00
|
|
|
var f io.ReadCloser
|
|
|
|
f, err = os.Open(filter.URL)
|
2020-11-20 07:32:41 -07:00
|
|
|
if err != nil {
|
|
|
|
return updated, fmt.Errorf("open file: %w", err)
|
2020-03-17 05:00:40 -07:00
|
|
|
}
|
2021-05-24 07:28:11 -07:00
|
|
|
defer func() { err = errors.WithDeferred(err, f.Close()) }()
|
2021-03-12 04:32:08 -07:00
|
|
|
|
2020-11-20 07:32:41 -07:00
|
|
|
reader = f
|
|
|
|
} else {
|
2021-03-12 04:32:08 -07:00
|
|
|
var resp *http.Response
|
|
|
|
resp, err = Context.client.Get(filter.URL)
|
2020-11-20 07:32:41 -07:00
|
|
|
if err != nil {
|
|
|
|
log.Printf("Couldn't request filter from URL %s, skipping: %s", filter.URL, err)
|
2021-05-24 07:28:11 -07:00
|
|
|
|
2020-11-20 07:32:41 -07:00
|
|
|
return updated, err
|
|
|
|
}
|
2021-05-24 07:28:11 -07:00
|
|
|
defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }()
|
2020-11-20 07:32:41 -07:00
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
log.Printf("Got status code %d from URL %s, skipping", resp.StatusCode, filter.URL)
|
|
|
|
return updated, fmt.Errorf("got status code != 200: %d", resp.StatusCode)
|
|
|
|
}
|
|
|
|
reader = resp.Body
|
|
|
|
}
|
|
|
|
|
|
|
|
total, err := f.read(reader, tmpFile, filter)
|
|
|
|
if err != nil {
|
|
|
|
return updated, err
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
// Extract filter name and count number of rules
|
2020-04-07 03:02:28 -07:00
|
|
|
_, _ = tmpFile.Seek(0, io.SeekStart)
|
|
|
|
rulesCount, checksum, filterName := f.parseFilterContents(tmpFile)
|
2018-11-28 10:14:54 -07:00
|
|
|
// Check if the filter has been really changed
|
2019-03-15 06:49:10 -07:00
|
|
|
if filter.checksum == checksum {
|
2019-02-12 09:22:17 -07:00
|
|
|
log.Tracef("Filter #%d at URL %s hasn't changed, not updating it", filter.ID, filter.URL)
|
2020-11-20 07:32:41 -07:00
|
|
|
return updated, nil
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
log.Printf("Filter %d has been updated: %d bytes, %d rules",
|
|
|
|
filter.ID, total, rulesCount)
|
2020-05-25 07:13:13 -07:00
|
|
|
if len(filter.Name) == 0 {
|
2019-03-15 06:49:10 -07:00
|
|
|
filter.Name = filterName
|
|
|
|
}
|
2018-11-28 10:14:54 -07:00
|
|
|
filter.RulesCount = rulesCount
|
2019-03-15 06:49:10 -07:00
|
|
|
filter.checksum = checksum
|
2018-11-28 10:14:54 -07:00
|
|
|
filterFilePath := filter.Path()
|
|
|
|
log.Printf("Saving filter %d contents to: %s", filter.ID, filterFilePath)
|
2020-04-07 03:02:28 -07:00
|
|
|
|
|
|
|
// Closing the file before renaming it is necessary on Windows
|
|
|
|
_ = tmpFile.Close()
|
|
|
|
err = os.Rename(tmpFile.Name(), filterFilePath)
|
2020-03-17 05:00:40 -07:00
|
|
|
if err != nil {
|
2020-11-20 07:32:41 -07:00
|
|
|
return updated, err
|
2019-10-21 09:49:56 -07:00
|
|
|
}
|
2020-04-07 03:02:28 -07:00
|
|
|
tmpFile = nil
|
2020-11-20 07:32:41 -07:00
|
|
|
updated = true
|
2020-03-17 05:00:40 -07:00
|
|
|
|
2020-11-20 07:32:41 -07:00
|
|
|
return updated, nil
|
2019-10-09 09:51:26 -07:00
|
|
|
}
|
|
|
|
|
2018-11-28 10:14:54 -07:00
|
|
|
// loads filter contents from the file in dataDir
|
2021-05-14 09:41:45 -07:00
|
|
|
func (f *Filtering) load(filter *filter) (err error) {
|
2018-11-28 10:14:54 -07:00
|
|
|
filterFilePath := filter.Path()
|
|
|
|
|
2021-05-14 09:41:45 -07:00
|
|
|
log.Tracef("filtering: loading filter %d contents to: %s", filter.ID, filterFilePath)
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
file, err := os.Open(filterFilePath)
|
2021-05-14 09:41:45 -07:00
|
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
|
|
// Do nothing, file doesn't exist.
|
|
|
|
return nil
|
|
|
|
} else if err != nil {
|
|
|
|
return fmt.Errorf("opening filter file: %w", err)
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
2021-05-24 07:28:11 -07:00
|
|
|
defer func() { err = errors.WithDeferred(err, file.Close()) }()
|
2018-11-28 10:14:54 -07:00
|
|
|
|
2021-05-14 09:41:45 -07:00
|
|
|
st, err := file.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("getting filter file stat: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Tracef("filtering: File %s, id %d, length %d", filterFilePath, filter.ID, st.Size())
|
|
|
|
|
2020-03-17 05:00:40 -07:00
|
|
|
rulesCount, checksum, _ := f.parseFilterContents(file)
|
2018-11-28 10:14:54 -07:00
|
|
|
|
|
|
|
filter.RulesCount = rulesCount
|
2020-03-17 05:00:40 -07:00
|
|
|
filter.checksum = checksum
|
2021-05-14 09:41:45 -07:00
|
|
|
filter.LastUpdated = st.ModTime()
|
2018-11-28 10:14:54 -07:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-03-18 04:12:04 -07:00
|
|
|
// Clear filter rules
|
|
|
|
func (filter *filter) unload() {
|
|
|
|
filter.RulesCount = 0
|
2020-03-17 05:00:40 -07:00
|
|
|
filter.checksum = 0
|
2019-03-18 04:12:04 -07:00
|
|
|
}
|
|
|
|
|
2018-11-28 10:14:54 -07:00
|
|
|
// Path to the filter contents
|
|
|
|
func (filter *filter) Path() string {
|
2020-02-13 08:42:07 -07:00
|
|
|
return filepath.Join(Context.getDataDir(), filterDir, strconv.FormatInt(filter.ID, 10)+".txt")
|
2018-11-28 10:14:54 -07:00
|
|
|
}
|
2019-02-10 11:44:16 -07:00
|
|
|
|
2019-10-09 09:51:26 -07:00
|
|
|
func enableFilters(async bool) {
|
2021-05-24 04:48:42 -07:00
|
|
|
config.RLock()
|
|
|
|
defer config.RUnlock()
|
|
|
|
|
|
|
|
enableFiltersLocked(async)
|
|
|
|
}
|
|
|
|
|
|
|
|
func enableFiltersLocked(async bool) {
|
2021-05-21 06:15:47 -07:00
|
|
|
var whiteFilters []filtering.Filter
|
|
|
|
filters := []filtering.Filter{{
|
2021-05-12 10:04:50 -07:00
|
|
|
Data: []byte(strings.Join(config.UserRules, "\n")),
|
|
|
|
}}
|
2019-10-09 09:51:26 -07:00
|
|
|
|
2021-05-12 10:04:50 -07:00
|
|
|
for _, filter := range config.Filters {
|
|
|
|
if !filter.Enabled {
|
|
|
|
continue
|
2020-02-26 09:58:25 -07:00
|
|
|
}
|
2019-10-09 09:51:26 -07:00
|
|
|
|
2021-05-21 06:15:47 -07:00
|
|
|
filters = append(filters, filtering.Filter{
|
2021-05-12 10:04:50 -07:00
|
|
|
ID: filter.ID,
|
|
|
|
FilePath: filter.Path(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
for _, filter := range config.WhitelistFilters {
|
|
|
|
if !filter.Enabled {
|
|
|
|
continue
|
2020-02-26 09:58:25 -07:00
|
|
|
}
|
2021-03-12 04:32:08 -07:00
|
|
|
|
2021-05-21 06:15:47 -07:00
|
|
|
whiteFilters = append(whiteFilters, filtering.Filter{
|
2021-05-12 10:04:50 -07:00
|
|
|
ID: filter.ID,
|
|
|
|
FilePath: filter.Path(),
|
|
|
|
})
|
2019-10-09 09:51:26 -07:00
|
|
|
}
|
|
|
|
|
2021-05-12 10:04:50 -07:00
|
|
|
if err := Context.dnsFilter.SetFilters(filters, whiteFilters, async); err != nil {
|
|
|
|
log.Debug("enabling filters: %s", err)
|
|
|
|
}
|
2021-05-24 04:48:42 -07:00
|
|
|
|
|
|
|
Context.dnsFilter.SetEnabled(config.DNS.FilteringEnabled)
|
2019-10-09 09:51:26 -07:00
|
|
|
}
|