Pull request 1921: 6003-relax-scan-limit
Updates #6003. Squashed commit of the following: commit 1cc42303c29edc621802fc182ccb5701e412f099 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Jul 13 13:47:41 2023 +0300 all: fix chlog commit e835084c7aac6384ea7b0886e6b3b1d614438baa Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Jul 13 13:40:45 2023 +0300 rulelist: imp longer line handling
This commit is contained in:
parent
de63eeabfa
commit
f22d893845
|
@ -23,11 +23,18 @@ See also the [v0.107.35 GitHub milestone][ms-v0.107.35].
|
|||
NOTE: Add new changes BELOW THIS COMMENT.
|
||||
-->
|
||||
|
||||
### Fixed
|
||||
|
||||
- `bufio.Scanner: token too long` errors when trying to add filtering-rule lists
|
||||
with lines over 1024 bytes long ([#6003]).
|
||||
|
||||
### Removed
|
||||
|
||||
- Default exposure of the non-standard ports 784 and 8853 for DNS-over-QUIC in
|
||||
the `Dockerfile`.
|
||||
|
||||
[#6003]: https://github.com/AdguardTeam/AdGuardHome/issues/6003
|
||||
|
||||
<!--
|
||||
NOTE: Add new changes ABOVE THIS COMMENT.
|
||||
-->
|
||||
|
|
|
@ -943,7 +943,7 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
|
|||
d = &DNSFilter{
|
||||
bufPool: &sync.Pool{
|
||||
New: func() (buf any) {
|
||||
bufVal := make([]byte, rulelist.MaxRuleLen)
|
||||
bufVal := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
|
||||
return &bufVal
|
||||
},
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"hash/crc32"
|
||||
"io"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/AdguardTeam/golibs/errors"
|
||||
)
|
||||
|
@ -48,19 +49,29 @@ type ParseResult struct {
|
|||
// nil.
|
||||
func (p *Parser) Parse(dst io.Writer, src io.Reader, buf []byte) (r *ParseResult, err error) {
|
||||
s := bufio.NewScanner(src)
|
||||
s.Buffer(buf, MaxRuleLen)
|
||||
|
||||
lineIdx := 0
|
||||
// Don't use [DefaultRuleBufSize] as the maximum size, since some
|
||||
// filtering-rule lists compressed by e.g. HostlistsCompiler can have very
|
||||
// large lines. The buffer optimization still works for the more common
|
||||
// case of reasonably-sized lines.
|
||||
//
|
||||
// See https://github.com/AdguardTeam/AdGuardHome/issues/6003.
|
||||
s.Buffer(buf, bufio.MaxScanTokenSize)
|
||||
|
||||
// Use a one-based index for lines and columns, since these errors end up in
|
||||
// the frontend, and users are more familiar with one-based line and column
|
||||
// indexes.
|
||||
lineNum := 1
|
||||
for s.Scan() {
|
||||
var n int
|
||||
n, err = p.processLine(dst, s.Bytes(), lineIdx)
|
||||
n, err = p.processLine(dst, s.Bytes(), lineNum)
|
||||
p.written += n
|
||||
if err != nil {
|
||||
// Don't wrap the error, because it's informative enough as is.
|
||||
return p.result(), err
|
||||
}
|
||||
|
||||
lineIdx++
|
||||
lineNum++
|
||||
}
|
||||
|
||||
r = p.result()
|
||||
|
@ -81,7 +92,7 @@ func (p *Parser) result() (r *ParseResult) {
|
|||
|
||||
// processLine processes a single line. It may write to dst, and if it does, n
|
||||
// is the number of bytes written.
|
||||
func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, err error) {
|
||||
func (p *Parser) processLine(dst io.Writer, line []byte, lineNum int) (n int, err error) {
|
||||
trimmed := bytes.TrimSpace(line)
|
||||
if p.written == 0 && isHTMLLine(trimmed) {
|
||||
return 0, ErrHTML
|
||||
|
@ -94,10 +105,13 @@ func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, er
|
|||
badIdx, isRule = p.parseLineTitle(trimmed)
|
||||
}
|
||||
if badIdx != -1 {
|
||||
badRune, _ := utf8.DecodeRune(trimmed[badIdx:])
|
||||
|
||||
return 0, fmt.Errorf(
|
||||
"line at index %d: character at index %d: non-printable character",
|
||||
lineIdx,
|
||||
badIdx+bytes.Index(line, trimmed),
|
||||
"line %d: character %d: non-printable character %q",
|
||||
lineNum,
|
||||
badIdx+bytes.Index(line, trimmed)+1,
|
||||
badRune,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,9 @@ import (
|
|||
func TestParser_Parse(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
longRule := strings.Repeat("a", rulelist.DefaultRuleBufSize+1) + "\n"
|
||||
tooLongRule := strings.Repeat("a", bufio.MaxScanTokenSize+1) + "\n"
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
in string
|
||||
|
@ -80,20 +83,28 @@ func TestParser_Parse(t *testing.T) {
|
|||
testRuleTextBlocked +
|
||||
">>>\x7F<<<",
|
||||
wantDst: testRuleTextBlocked,
|
||||
wantErrMsg: "line at index 2: " +
|
||||
"character at index 3: " +
|
||||
"non-printable character",
|
||||
wantErrMsg: "line 3: " +
|
||||
"character 4: " +
|
||||
"non-printable character '\\x7f'",
|
||||
wantTitle: "Test Title",
|
||||
wantRulesNum: 1,
|
||||
wantWritten: len(testRuleTextBlocked),
|
||||
}, {
|
||||
name: "too_long",
|
||||
in: strings.Repeat("a", rulelist.MaxRuleLen+1),
|
||||
in: tooLongRule,
|
||||
wantDst: "",
|
||||
wantErrMsg: "scanning filter contents: " + bufio.ErrTooLong.Error(),
|
||||
wantErrMsg: "scanning filter contents: bufio.Scanner: token too long",
|
||||
wantTitle: "",
|
||||
wantRulesNum: 0,
|
||||
wantWritten: 0,
|
||||
}, {
|
||||
name: "longer_than_default",
|
||||
in: longRule,
|
||||
wantDst: longRule,
|
||||
wantErrMsg: "",
|
||||
wantTitle: "",
|
||||
wantRulesNum: 1,
|
||||
wantWritten: len(longRule),
|
||||
}, {
|
||||
name: "bad_tab_and_comment",
|
||||
in: testRuleTextBadTab,
|
||||
|
@ -118,7 +129,7 @@ func TestParser_Parse(t *testing.T) {
|
|||
t.Parallel()
|
||||
|
||||
dst := &bytes.Buffer{}
|
||||
buf := make([]byte, rulelist.MaxRuleLen)
|
||||
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
|
||||
p := rulelist.NewParser()
|
||||
r, err := p.Parse(dst, strings.NewReader(tc.in), buf)
|
||||
|
@ -145,7 +156,7 @@ func TestParser_Parse_writeError(t *testing.T) {
|
|||
return 1, errors.Error("test error")
|
||||
},
|
||||
}
|
||||
buf := make([]byte, rulelist.MaxRuleLen)
|
||||
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
|
||||
p := rulelist.NewParser()
|
||||
r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
|
||||
|
@ -165,7 +176,7 @@ func TestParser_Parse_checksums(t *testing.T) {
|
|||
"# Another comment.\n"
|
||||
)
|
||||
|
||||
buf := make([]byte, rulelist.MaxRuleLen)
|
||||
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
|
||||
p := rulelist.NewParser()
|
||||
r, err := p.Parse(&bytes.Buffer{}, strings.NewReader(withoutComments), buf)
|
||||
|
@ -192,7 +203,7 @@ var (
|
|||
func BenchmarkParser_Parse(b *testing.B) {
|
||||
dst := &bytes.Buffer{}
|
||||
src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
|
||||
buf := make([]byte, rulelist.MaxRuleLen)
|
||||
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
p := rulelist.NewParser()
|
||||
|
||||
b.ReportAllocs()
|
||||
|
|
|
@ -4,8 +4,6 @@
|
|||
// TODO(a.garipov): Expand.
|
||||
package rulelist
|
||||
|
||||
// MaxRuleLen is the maximum length of a line with a filtering rule, in bytes.
|
||||
//
|
||||
// TODO(a.garipov): Consider changing this to a rune length, like AdGuardDNS
|
||||
// does.
|
||||
const MaxRuleLen = 1024
|
||||
// DefaultRuleBufSize is the default length of a buffer used to read a line with
|
||||
// a filtering rule, in bytes.
|
||||
const DefaultRuleBufSize = 1024
|
||||
|
|
Loading…
Reference in New Issue