Pull request 1921: 6003-relax-scan-limit
Updates #6003. Squashed commit of the following: commit 1cc42303c29edc621802fc182ccb5701e412f099 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Jul 13 13:47:41 2023 +0300 all: fix chlog commit e835084c7aac6384ea7b0886e6b3b1d614438baa Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Jul 13 13:40:45 2023 +0300 rulelist: imp longer line handling
This commit is contained in:
parent
de63eeabfa
commit
f22d893845
|
@ -23,11 +23,18 @@ See also the [v0.107.35 GitHub milestone][ms-v0.107.35].
|
||||||
NOTE: Add new changes BELOW THIS COMMENT.
|
NOTE: Add new changes BELOW THIS COMMENT.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- `bufio.Scanner: token too long` errors when trying to add filtering-rule lists
|
||||||
|
with lines over 1024 bytes long ([#6003]).
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
||||||
- Default exposure of the non-standard ports 784 and 8853 for DNS-over-QUIC in
|
- Default exposure of the non-standard ports 784 and 8853 for DNS-over-QUIC in
|
||||||
the `Dockerfile`.
|
the `Dockerfile`.
|
||||||
|
|
||||||
|
[#6003]: https://github.com/AdguardTeam/AdGuardHome/issues/6003
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
NOTE: Add new changes ABOVE THIS COMMENT.
|
NOTE: Add new changes ABOVE THIS COMMENT.
|
||||||
-->
|
-->
|
||||||
|
|
|
@ -943,7 +943,7 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
|
||||||
d = &DNSFilter{
|
d = &DNSFilter{
|
||||||
bufPool: &sync.Pool{
|
bufPool: &sync.Pool{
|
||||||
New: func() (buf any) {
|
New: func() (buf any) {
|
||||||
bufVal := make([]byte, rulelist.MaxRuleLen)
|
bufVal := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
|
|
||||||
return &bufVal
|
return &bufVal
|
||||||
},
|
},
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
"io"
|
"io"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/AdguardTeam/golibs/errors"
|
"github.com/AdguardTeam/golibs/errors"
|
||||||
)
|
)
|
||||||
|
@ -48,19 +49,29 @@ type ParseResult struct {
|
||||||
// nil.
|
// nil.
|
||||||
func (p *Parser) Parse(dst io.Writer, src io.Reader, buf []byte) (r *ParseResult, err error) {
|
func (p *Parser) Parse(dst io.Writer, src io.Reader, buf []byte) (r *ParseResult, err error) {
|
||||||
s := bufio.NewScanner(src)
|
s := bufio.NewScanner(src)
|
||||||
s.Buffer(buf, MaxRuleLen)
|
|
||||||
|
|
||||||
lineIdx := 0
|
// Don't use [DefaultRuleBufSize] as the maximum size, since some
|
||||||
|
// filtering-rule lists compressed by e.g. HostlistsCompiler can have very
|
||||||
|
// large lines. The buffer optimization still works for the more common
|
||||||
|
// case of reasonably-sized lines.
|
||||||
|
//
|
||||||
|
// See https://github.com/AdguardTeam/AdGuardHome/issues/6003.
|
||||||
|
s.Buffer(buf, bufio.MaxScanTokenSize)
|
||||||
|
|
||||||
|
// Use a one-based index for lines and columns, since these errors end up in
|
||||||
|
// the frontend, and users are more familiar with one-based line and column
|
||||||
|
// indexes.
|
||||||
|
lineNum := 1
|
||||||
for s.Scan() {
|
for s.Scan() {
|
||||||
var n int
|
var n int
|
||||||
n, err = p.processLine(dst, s.Bytes(), lineIdx)
|
n, err = p.processLine(dst, s.Bytes(), lineNum)
|
||||||
p.written += n
|
p.written += n
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Don't wrap the error, because it's informative enough as is.
|
// Don't wrap the error, because it's informative enough as is.
|
||||||
return p.result(), err
|
return p.result(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
lineIdx++
|
lineNum++
|
||||||
}
|
}
|
||||||
|
|
||||||
r = p.result()
|
r = p.result()
|
||||||
|
@ -81,7 +92,7 @@ func (p *Parser) result() (r *ParseResult) {
|
||||||
|
|
||||||
// processLine processes a single line. It may write to dst, and if it does, n
|
// processLine processes a single line. It may write to dst, and if it does, n
|
||||||
// is the number of bytes written.
|
// is the number of bytes written.
|
||||||
func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, err error) {
|
func (p *Parser) processLine(dst io.Writer, line []byte, lineNum int) (n int, err error) {
|
||||||
trimmed := bytes.TrimSpace(line)
|
trimmed := bytes.TrimSpace(line)
|
||||||
if p.written == 0 && isHTMLLine(trimmed) {
|
if p.written == 0 && isHTMLLine(trimmed) {
|
||||||
return 0, ErrHTML
|
return 0, ErrHTML
|
||||||
|
@ -94,10 +105,13 @@ func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, er
|
||||||
badIdx, isRule = p.parseLineTitle(trimmed)
|
badIdx, isRule = p.parseLineTitle(trimmed)
|
||||||
}
|
}
|
||||||
if badIdx != -1 {
|
if badIdx != -1 {
|
||||||
|
badRune, _ := utf8.DecodeRune(trimmed[badIdx:])
|
||||||
|
|
||||||
return 0, fmt.Errorf(
|
return 0, fmt.Errorf(
|
||||||
"line at index %d: character at index %d: non-printable character",
|
"line %d: character %d: non-printable character %q",
|
||||||
lineIdx,
|
lineNum,
|
||||||
badIdx+bytes.Index(line, trimmed),
|
badIdx+bytes.Index(line, trimmed)+1,
|
||||||
|
badRune,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,9 @@ import (
|
||||||
func TestParser_Parse(t *testing.T) {
|
func TestParser_Parse(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
longRule := strings.Repeat("a", rulelist.DefaultRuleBufSize+1) + "\n"
|
||||||
|
tooLongRule := strings.Repeat("a", bufio.MaxScanTokenSize+1) + "\n"
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
name string
|
name string
|
||||||
in string
|
in string
|
||||||
|
@ -80,20 +83,28 @@ func TestParser_Parse(t *testing.T) {
|
||||||
testRuleTextBlocked +
|
testRuleTextBlocked +
|
||||||
">>>\x7F<<<",
|
">>>\x7F<<<",
|
||||||
wantDst: testRuleTextBlocked,
|
wantDst: testRuleTextBlocked,
|
||||||
wantErrMsg: "line at index 2: " +
|
wantErrMsg: "line 3: " +
|
||||||
"character at index 3: " +
|
"character 4: " +
|
||||||
"non-printable character",
|
"non-printable character '\\x7f'",
|
||||||
wantTitle: "Test Title",
|
wantTitle: "Test Title",
|
||||||
wantRulesNum: 1,
|
wantRulesNum: 1,
|
||||||
wantWritten: len(testRuleTextBlocked),
|
wantWritten: len(testRuleTextBlocked),
|
||||||
}, {
|
}, {
|
||||||
name: "too_long",
|
name: "too_long",
|
||||||
in: strings.Repeat("a", rulelist.MaxRuleLen+1),
|
in: tooLongRule,
|
||||||
wantDst: "",
|
wantDst: "",
|
||||||
wantErrMsg: "scanning filter contents: " + bufio.ErrTooLong.Error(),
|
wantErrMsg: "scanning filter contents: bufio.Scanner: token too long",
|
||||||
wantTitle: "",
|
wantTitle: "",
|
||||||
wantRulesNum: 0,
|
wantRulesNum: 0,
|
||||||
wantWritten: 0,
|
wantWritten: 0,
|
||||||
|
}, {
|
||||||
|
name: "longer_than_default",
|
||||||
|
in: longRule,
|
||||||
|
wantDst: longRule,
|
||||||
|
wantErrMsg: "",
|
||||||
|
wantTitle: "",
|
||||||
|
wantRulesNum: 1,
|
||||||
|
wantWritten: len(longRule),
|
||||||
}, {
|
}, {
|
||||||
name: "bad_tab_and_comment",
|
name: "bad_tab_and_comment",
|
||||||
in: testRuleTextBadTab,
|
in: testRuleTextBadTab,
|
||||||
|
@ -118,7 +129,7 @@ func TestParser_Parse(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
dst := &bytes.Buffer{}
|
dst := &bytes.Buffer{}
|
||||||
buf := make([]byte, rulelist.MaxRuleLen)
|
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
|
|
||||||
p := rulelist.NewParser()
|
p := rulelist.NewParser()
|
||||||
r, err := p.Parse(dst, strings.NewReader(tc.in), buf)
|
r, err := p.Parse(dst, strings.NewReader(tc.in), buf)
|
||||||
|
@ -145,7 +156,7 @@ func TestParser_Parse_writeError(t *testing.T) {
|
||||||
return 1, errors.Error("test error")
|
return 1, errors.Error("test error")
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
buf := make([]byte, rulelist.MaxRuleLen)
|
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
|
|
||||||
p := rulelist.NewParser()
|
p := rulelist.NewParser()
|
||||||
r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
|
r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
|
||||||
|
@ -165,7 +176,7 @@ func TestParser_Parse_checksums(t *testing.T) {
|
||||||
"# Another comment.\n"
|
"# Another comment.\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
buf := make([]byte, rulelist.MaxRuleLen)
|
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
|
|
||||||
p := rulelist.NewParser()
|
p := rulelist.NewParser()
|
||||||
r, err := p.Parse(&bytes.Buffer{}, strings.NewReader(withoutComments), buf)
|
r, err := p.Parse(&bytes.Buffer{}, strings.NewReader(withoutComments), buf)
|
||||||
|
@ -192,7 +203,7 @@ var (
|
||||||
func BenchmarkParser_Parse(b *testing.B) {
|
func BenchmarkParser_Parse(b *testing.B) {
|
||||||
dst := &bytes.Buffer{}
|
dst := &bytes.Buffer{}
|
||||||
src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
|
src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
|
||||||
buf := make([]byte, rulelist.MaxRuleLen)
|
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
p := rulelist.NewParser()
|
p := rulelist.NewParser()
|
||||||
|
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
|
|
|
@ -4,8 +4,6 @@
|
||||||
// TODO(a.garipov): Expand.
|
// TODO(a.garipov): Expand.
|
||||||
package rulelist
|
package rulelist
|
||||||
|
|
||||||
// MaxRuleLen is the maximum length of a line with a filtering rule, in bytes.
|
// DefaultRuleBufSize is the default length of a buffer used to read a line with
|
||||||
//
|
// a filtering rule, in bytes.
|
||||||
// TODO(a.garipov): Consider changing this to a rune length, like AdGuardDNS
|
const DefaultRuleBufSize = 1024
|
||||||
// does.
|
|
||||||
const MaxRuleLen = 1024
|
|
||||||
|
|
Loading…
Reference in New Issue