AdGuardHome/internal/filtering/filter.go

663 lines
16 KiB
Go

package filtering
import (
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghalg"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
"golang.org/x/exp/slices"
)
// filterDir is the subdirectory of a data directory to store downloaded
// filters.
const filterDir = "filters"
// nextFilterID is a way to seed a unique ID generation.
//
// TODO(e.burkov): Use more deterministic approach.
var nextFilterID = time.Now().Unix()
// FilterYAML represents a filter list in the configuration file.
//
// TODO(e.burkov): Investigate if the field ordering is important.
type FilterYAML struct {
Enabled bool
URL string // URL or a file path
Name string `yaml:"name"`
RulesCount int `yaml:"-"`
LastUpdated time.Time `yaml:"-"`
checksum uint32 // checksum of the file data
white bool
Filter `yaml:",inline"`
}
// Clear filter rules
func (filter *FilterYAML) unload() {
filter.RulesCount = 0
filter.checksum = 0
}
// Path to the filter contents
func (filter *FilterYAML) Path(dataDir string) string {
return filepath.Join(dataDir, filterDir, strconv.FormatInt(filter.ID, 10)+".txt")
}
const (
// errFilterNotExist is returned from [filterSetProperties] when there are
// no lists with the desired URL to update.
//
// TODO(e.burkov): Use wherever the same error is needed.
errFilterNotExist errors.Error = "url doesn't exist"
// errFilterExists is returned from [filterSetProperties] when there is
// another filter having the same URL as the one updated.
//
// TODO(e.burkov): Use wherever the same error is needed.
errFilterExists errors.Error = "url already exists"
)
// filterSetProperties searches for the particular filter list by url and sets
// the values of newList to it, updating afterwards if needed. It returns true
// if the update was performed and the filtering engine restart is required.
func (d *DNSFilter) filterSetProperties(
listURL string,
newList FilterYAML,
isAllowlist bool,
) (shouldRestart bool, err error) {
d.filtersMu.Lock()
defer d.filtersMu.Unlock()
filters := d.Filters
if isAllowlist {
filters = d.WhitelistFilters
}
i := slices.IndexFunc(filters, func(filt FilterYAML) bool { return filt.URL == listURL })
if i == -1 {
return false, errFilterNotExist
}
filt := &filters[i]
log.Debug(
"filtering: set name to %q, url to %s, enabled to %t for filter %s",
newList.Name,
newList.URL,
newList.Enabled,
filt.URL,
)
defer func(oldURL, oldName string, oldEnabled bool, oldUpdated time.Time, oldRulesCount int) {
if err != nil {
filt.URL = oldURL
filt.Name = oldName
filt.Enabled = oldEnabled
filt.LastUpdated = oldUpdated
filt.RulesCount = oldRulesCount
}
}(filt.URL, filt.Name, filt.Enabled, filt.LastUpdated, filt.RulesCount)
filt.Name = newList.Name
if filt.URL != newList.URL {
if d.filterExistsLocked(newList.URL) {
return false, errFilterExists
}
shouldRestart = true
filt.URL = newList.URL
filt.LastUpdated = time.Time{}
filt.unload()
}
if filt.Enabled != newList.Enabled {
filt.Enabled = newList.Enabled
shouldRestart = true
}
if filt.Enabled {
if shouldRestart {
// Download the filter contents.
shouldRestart, err = d.update(filt)
}
} else {
// TODO(e.burkov): The validation of the contents of the new URL is
// currently skipped if the rule list is disabled. This makes it
// possible to set a bad rules source, but the validation should still
// kick in when the filter is enabled. Consider changing this behavior
// to be stricter.
filt.unload()
}
return shouldRestart, err
}
// filterExists returns true if a filter with the same url exists in d. It's
// safe for concurrent use.
func (d *DNSFilter) filterExists(url string) (ok bool) {
d.filtersMu.RLock()
defer d.filtersMu.RUnlock()
r := d.filterExistsLocked(url)
return r
}
// filterExistsLocked returns true if d contains the filter with the same url.
// d.filtersMu is expected to be locked.
func (d *DNSFilter) filterExistsLocked(url string) (ok bool) {
for _, f := range d.Filters {
if f.URL == url {
return true
}
}
for _, f := range d.WhitelistFilters {
if f.URL == url {
return true
}
}
return false
}
// Add a filter
// Return FALSE if a filter with this URL exists
func (d *DNSFilter) filterAdd(flt FilterYAML) (err error) {
// Defer annotating to unlock sooner.
defer func() { err = errors.Annotate(err, "adding filter: %w") }()
d.filtersMu.Lock()
defer d.filtersMu.Unlock()
// Check for duplicates.
if d.filterExistsLocked(flt.URL) {
return errFilterExists
}
if flt.white {
d.WhitelistFilters = append(d.WhitelistFilters, flt)
} else {
d.Filters = append(d.Filters, flt)
}
return nil
}
// Load filters from the disk
// And if any filter has zero ID, assign a new one
func (d *DNSFilter) loadFilters(array []FilterYAML) {
for i := range array {
filter := &array[i] // otherwise we're operating on a copy
if filter.ID == 0 {
filter.ID = assignUniqueFilterID()
}
if !filter.Enabled {
// No need to load a filter that is not enabled
continue
}
err := d.load(filter)
if err != nil {
log.Error("filtering: loading filter %d: %s", filter.ID, err)
}
}
}
func deduplicateFilters(filters []FilterYAML) (deduplicated []FilterYAML) {
urls := stringutil.NewSet()
lastIdx := 0
for _, filter := range filters {
if !urls.Has(filter.URL) {
urls.Add(filter.URL)
filters[lastIdx] = filter
lastIdx++
}
}
return filters[:lastIdx]
}
// Set the next filter ID to max(filter.ID) + 1
func updateUniqueFilterID(filters []FilterYAML) {
for _, filter := range filters {
if nextFilterID < filter.ID {
nextFilterID = filter.ID + 1
}
}
}
// TODO(e.burkov): Improve this inexhaustible source of races.
func assignUniqueFilterID() int64 {
value := nextFilterID
nextFilterID++
return value
}
// Sets up a timer that will be checking for filters updates periodically
func (d *DNSFilter) periodicallyRefreshFilters() {
const maxInterval = 1 * 60 * 60
intval := 5 // use a dynamically increasing time interval
for {
isNetErr, ok := false, false
if d.FiltersUpdateIntervalHours != 0 {
_, isNetErr, ok = d.tryRefreshFilters(true, true, false)
if ok && !isNetErr {
intval = maxInterval
}
}
if isNetErr {
intval *= 2
if intval > maxInterval {
intval = maxInterval
}
}
time.Sleep(time.Duration(intval) * time.Second)
}
}
// tryRefreshFilters is like [refreshFilters], but backs down if the update is
// already going on.
//
// TODO(e.burkov): Get rid of the concurrency pattern which requires the
// [sync.Mutex.TryLock].
func (d *DNSFilter) tryRefreshFilters(block, allow, force bool) (updated int, isNetworkErr, ok bool) {
if ok = d.refreshLock.TryLock(); !ok {
return 0, false, false
}
defer d.refreshLock.Unlock()
updated, isNetworkErr = d.refreshFiltersIntl(block, allow, force)
return updated, isNetworkErr, ok
}
// listsToUpdate returns the slice of filter lists that could be updated.
func (d *DNSFilter) listsToUpdate(filters *[]FilterYAML, force bool) (toUpd []FilterYAML) {
now := time.Now()
d.filtersMu.RLock()
defer d.filtersMu.RUnlock()
for i := range *filters {
flt := &(*filters)[i] // otherwise we will be operating on a copy
if !flt.Enabled {
continue
}
if !force {
exp := flt.LastUpdated.Add(time.Duration(d.FiltersUpdateIntervalHours) * time.Hour)
if now.Before(exp) {
continue
}
}
toUpd = append(toUpd, FilterYAML{
Filter: Filter{
ID: flt.ID,
},
URL: flt.URL,
Name: flt.Name,
checksum: flt.checksum,
})
}
return toUpd
}
func (d *DNSFilter) refreshFiltersArray(filters *[]FilterYAML, force bool) (int, []FilterYAML, []bool, bool) {
var updateFlags []bool // 'true' if filter data has changed
updateFilters := d.listsToUpdate(filters, force)
if len(updateFilters) == 0 {
return 0, nil, nil, false
}
nfail := 0
for i := range updateFilters {
uf := &updateFilters[i]
updated, err := d.update(uf)
updateFlags = append(updateFlags, updated)
if err != nil {
nfail++
log.Info("filtering: updating filter from url %q: %s\n", uf.URL, err)
continue
}
}
if nfail == len(updateFilters) {
return 0, nil, nil, true
}
updateCount := 0
d.filtersMu.Lock()
defer d.filtersMu.Unlock()
for i := range updateFilters {
uf := &updateFilters[i]
updated := updateFlags[i]
for k := range *filters {
f := &(*filters)[k]
if f.ID != uf.ID || f.URL != uf.URL {
continue
}
f.LastUpdated = uf.LastUpdated
if !updated {
continue
}
log.Info(
"filtering: updated filter %d; rule count: %d (was %d)",
f.ID,
uf.RulesCount,
f.RulesCount,
)
f.Name = uf.Name
f.RulesCount = uf.RulesCount
f.checksum = uf.checksum
updateCount++
}
}
return updateCount, updateFilters, updateFlags, false
}
// refreshFiltersIntl checks filters and updates them if necessary. If force is
// true, it ignores the filter.LastUpdated field value.
//
// Algorithm:
//
// 1. Get the list of filters to be updated. For each filter, run the download
// and checksum check operation. Store downloaded data in a temporary file
// inside data/filters directory
//
// 2. For each filter, if filter data hasn't changed, just set new update time
// on file. Otherwise, rename the temporary file (<temp> -> 1.txt). Note
// that this method works only on Unix systems. On Windows, don't pass
// files to filtering, pass the whole data.
//
// refreshFiltersIntl returns the number of updated filters. It also returns
// true if there was a network error and nothing could be updated.
//
// TODO(a.garipov, e.burkov): What the hell?
func (d *DNSFilter) refreshFiltersIntl(block, allow, force bool) (int, bool) {
updNum := 0
log.Debug("filtering: starting updating")
defer func() { log.Debug("filtering: finished updating, %d updated", updNum) }()
var lists []FilterYAML
var toUpd []bool
isNetErr := false
if block {
updNum, lists, toUpd, isNetErr = d.refreshFiltersArray(&d.Filters, force)
}
if allow {
updNumAl, listsAl, toUpdAl, isNetErrAl := d.refreshFiltersArray(&d.WhitelistFilters, force)
updNum += updNumAl
lists = append(lists, listsAl...)
toUpd = append(toUpd, toUpdAl...)
isNetErr = isNetErr || isNetErrAl
}
if isNetErr {
return 0, true
}
if updNum != 0 {
d.EnableFilters(false)
for i := range lists {
uf := &lists[i]
updated := toUpd[i]
if !updated {
continue
}
p := uf.Path(d.DataDir)
err := os.Remove(p + ".old")
if err != nil {
log.Debug("filtering: removing old filter file %q: %s", p, err)
}
}
}
return updNum, false
}
// update refreshes filter's content and a/mtimes of it's file.
func (d *DNSFilter) update(filter *FilterYAML) (b bool, err error) {
b, err = d.updateIntl(filter)
filter.LastUpdated = time.Now()
if !b {
chErr := os.Chtimes(
filter.Path(d.DataDir),
filter.LastUpdated,
filter.LastUpdated,
)
if chErr != nil {
log.Error("filtering: os.Chtimes(): %s", chErr)
}
}
return b, err
}
// finalizeUpdate closes and gets rid of temporary file f with filter's content
// according to updated. It also saves new values of flt's name, rules number
// and checksum if succeeded.
func (d *DNSFilter) finalizeUpdate(
file *os.File,
flt *FilterYAML,
updated bool,
res *rulelist.ParseResult,
) (err error) {
tmpFileName := file.Name()
// Close the file before renaming it because it's required on Windows.
//
// See https://github.com/adguardTeam/adGuardHome/issues/1553.
err = file.Close()
if err != nil {
return fmt.Errorf("closing temporary file: %w", err)
}
if !updated {
log.Debug("filtering: filter %d from url %q has no changes, skipping", flt.ID, flt.URL)
return os.Remove(tmpFileName)
}
fltPath := flt.Path(d.DataDir)
log.Info("filtering: saving contents of filter %d into %q", flt.ID, fltPath)
// Don't use renameio or maybe packages, since those will require loading
// the whole filter content to the memory on Windows.
err = os.Rename(tmpFileName, fltPath)
if err != nil {
return errors.WithDeferred(err, os.Remove(tmpFileName))
}
flt.Name = aghalg.Coalesce(flt.Name, res.Title)
flt.checksum, flt.RulesCount = res.Checksum, res.RulesCount
return nil
}
// updateIntl updates the flt rewriting it's actual file. It returns true if
// the actual update has been performed.
func (d *DNSFilter) updateIntl(flt *FilterYAML) (ok bool, err error) {
log.Debug("filtering: downloading update for filter %d from %q", flt.ID, flt.URL)
var res *rulelist.ParseResult
var tmpFile *os.File
tmpFile, err = os.CreateTemp(filepath.Join(d.DataDir, filterDir), "")
if err != nil {
return false, err
}
defer func() {
finErr := d.finalizeUpdate(tmpFile, flt, ok, res)
if ok && finErr == nil {
log.Info(
"filtering: updated filter %d: %d bytes, %d rules",
flt.ID,
res.BytesWritten,
res.RulesCount,
)
return
}
err = errors.WithDeferred(err, finErr)
}()
// Change the default 0o600 permission to something more acceptable by end
// users.
//
// See https://github.com/AdguardTeam/AdGuardHome/issues/3198.
if err = tmpFile.Chmod(0o644); err != nil {
return false, fmt.Errorf("changing file mode: %w", err)
}
var r io.Reader
if !filepath.IsAbs(flt.URL) {
var resp *http.Response
resp, err = d.HTTPClient.Get(flt.URL)
if err != nil {
log.Info("filtering: requesting filter from %q: %s, skipping", flt.URL, err)
return false, err
}
defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }()
if resp.StatusCode != http.StatusOK {
log.Info("filtering got status code %d from %q, skipping", resp.StatusCode, flt.URL)
return false, fmt.Errorf("got status code %d, want %d", resp.StatusCode, http.StatusOK)
}
r = resp.Body
} else {
var f *os.File
f, err = os.Open(flt.URL)
if err != nil {
return false, fmt.Errorf("open file: %w", err)
}
defer func() { err = errors.WithDeferred(err, f.Close()) }()
r = f
}
bufPtr := d.bufPool.Get().(*[]byte)
defer d.bufPool.Put(bufPtr)
p := rulelist.NewParser()
res, err = p.Parse(tmpFile, r, *bufPtr)
return res.Checksum != flt.checksum && err == nil, err
}
// loads filter contents from the file in dataDir
func (d *DNSFilter) load(flt *FilterYAML) (err error) {
fileName := flt.Path(d.DataDir)
log.Debug("filtering: loading filter %d from %q", flt.ID, fileName)
file, err := os.Open(fileName)
if errors.Is(err, os.ErrNotExist) {
// Do nothing, file doesn't exist.
return nil
} else if err != nil {
return fmt.Errorf("opening filter file: %w", err)
}
defer func() { err = errors.WithDeferred(err, file.Close()) }()
st, err := file.Stat()
if err != nil {
return fmt.Errorf("getting filter file stat: %w", err)
}
log.Debug("filtering: file %q, id %d, length %d", fileName, flt.ID, st.Size())
bufPtr := d.bufPool.Get().(*[]byte)
defer d.bufPool.Put(bufPtr)
p := rulelist.NewParser()
res, err := p.Parse(io.Discard, file, *bufPtr)
if err != nil {
return fmt.Errorf("parsing filter file: %w", err)
}
flt.RulesCount, flt.checksum, flt.LastUpdated = res.RulesCount, res.Checksum, st.ModTime()
return nil
}
func (d *DNSFilter) EnableFilters(async bool) {
d.filtersMu.RLock()
defer d.filtersMu.RUnlock()
d.enableFiltersLocked(async)
}
func (d *DNSFilter) enableFiltersLocked(async bool) {
filters := make([]Filter, 1, len(d.Filters)+len(d.WhitelistFilters)+1)
filters[0] = Filter{
ID: CustomListID,
Data: []byte(strings.Join(d.UserRules, "\n")),
}
for _, filter := range d.Filters {
if !filter.Enabled {
continue
}
filters = append(filters, Filter{
ID: filter.ID,
FilePath: filter.Path(d.DataDir),
})
}
var allowFilters []Filter
for _, filter := range d.WhitelistFilters {
if !filter.Enabled {
continue
}
allowFilters = append(allowFilters, Filter{
ID: filter.ID,
FilePath: filter.Path(d.DataDir),
})
}
err := d.setFilters(filters, allowFilters, async)
if err != nil {
log.Error("filtering: enabling filters: %s", err)
}
d.SetEnabled(d.FilteringEnabled)
}