util/httphdr: add new package for parsing HTTP headers (#9797)

This adds support for parsing Range and Content-Range headers
according to RFC 7230. The package could be extended in the future
to handle other headers.

Updates tailscale/corp#14772

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2023-10-13 15:38:22 -07:00 committed by GitHub
parent af5a586463
commit 9cb6c5bb78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 293 additions and 0 deletions

197
util/httphdr/httphdr.go Normal file
View File

@ -0,0 +1,197 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package httphdr implements functionality for parsing and formatting
// standard HTTP headers.
package httphdr
import (
"bytes"
"strconv"
"strings"
)
// Range is a range of bytes within some content.
type Range struct {
// Start is the starting offset.
// It is zero if Length is negative; it must not be negative.
Start int64
// Length is the length of the content.
// It is zero if the length extends to the end of the content.
// It is negative if the length is relative to the end (e.g., last 5 bytes).
Length int64
}
// ows is optional whitespace.
const ows = " \t" // per RFC 7230, section 3.2.3
// ParseRange parses a "Range" header per RFC 7233, section 3.
// It only handles "Range" headers where the units is "bytes".
// The "Range" header is usually only specified in GET requests.
func ParseRange(hdr string) (ranges []Range, ok bool) {
// Grammar per RFC 7233, appendix D:
// Range = byte-ranges-specifier | other-ranges-specifier
// byte-ranges-specifier = bytes-unit "=" byte-range-set
// bytes-unit = "bytes"
// byte-range-set =
// *("," OWS)
// (byte-range-spec | suffix-byte-range-spec)
// *(OWS "," [OWS ( byte-range-spec | suffix-byte-range-spec )])
// byte-range-spec = first-byte-pos "-" [last-byte-pos]
// suffix-byte-range-spec = "-" suffix-length
// We do not support other-ranges-specifier.
// All other identifiers are 1*DIGIT.
hdr = strings.Trim(hdr, ows) // per RFC 7230, section 3.2
units, elems, hasUnits := strings.Cut(hdr, "=")
elems = strings.TrimLeft(elems, ","+ows)
for _, elem := range strings.Split(elems, ",") {
elem = strings.Trim(elem, ows) // per RFC 7230, section 7
switch {
case strings.HasPrefix(elem, "-"): // i.e., "-" suffix-length
n, ok := parseNumber(strings.TrimPrefix(elem, "-"))
if !ok {
return ranges, false
}
ranges = append(ranges, Range{0, -n})
case strings.HasSuffix(elem, "-"): // i.e., first-byte-pos "-"
n, ok := parseNumber(strings.TrimSuffix(elem, "-"))
if !ok {
return ranges, false
}
ranges = append(ranges, Range{n, 0})
default: // i.e., first-byte-pos "-" last-byte-pos
prefix, suffix, hasDash := strings.Cut(elem, "-")
n, ok2 := parseNumber(prefix)
m, ok3 := parseNumber(suffix)
if !hasDash || !ok2 || !ok3 || m < n {
return ranges, false
}
ranges = append(ranges, Range{n, m - n + 1})
}
}
return ranges, units == "bytes" && hasUnits && len(ranges) > 0 // must see at least one element per RFC 7233, section 2.1
}
// FormatRange formats a "Range" header per RFC 7233, section 3.
// It only handles "Range" headers where the units is "bytes".
// The "Range" header is usually only specified in GET requests.
func FormatRange(ranges []Range) (hdr string, ok bool) {
b := []byte("bytes=")
for _, r := range ranges {
switch {
case r.Length > 0: // i.e., first-byte-pos "-" last-byte-pos
if r.Start < 0 {
return string(b), false
}
b = strconv.AppendUint(b, uint64(r.Start), 10)
b = append(b, '-')
b = strconv.AppendUint(b, uint64(r.Start+r.Length-1), 10)
b = append(b, ',')
case r.Length == 0: // i.e., first-byte-pos "-"
if r.Start < 0 {
return string(b), false
}
b = strconv.AppendUint(b, uint64(r.Start), 10)
b = append(b, '-')
b = append(b, ',')
case r.Length < 0: // i.e., "-" suffix-length
if r.Start != 0 {
return string(b), false
}
b = append(b, '-')
b = strconv.AppendUint(b, uint64(-r.Length), 10)
b = append(b, ',')
default:
return string(b), false
}
}
return string(bytes.TrimRight(b, ",")), len(ranges) > 0
}
// ParseContentRange parses a "Content-Range" header per RFC 7233, section 4.2.
// It only handles "Content-Range" headers where the units is "bytes".
// The "Content-Range" header is usually only specified in HTTP responses.
//
// If only the completeLength is specified, then start and length are both zero.
//
// Otherwise, the parses the start and length and the optional completeLength,
// which is -1 if unspecified. The start is non-negative and the length is positive.
func ParseContentRange(hdr string) (start, length, completeLength int64, ok bool) {
// Grammar per RFC 7233, appendix D:
// Content-Range = byte-content-range | other-content-range
// byte-content-range = bytes-unit SP (byte-range-resp | unsatisfied-range)
// bytes-unit = "bytes"
// byte-range-resp = byte-range "/" (complete-length | "*")
// unsatisfied-range = "*/" complete-length
// byte-range = first-byte-pos "-" last-byte-pos
// We do not support other-content-range.
// All other identifiers are 1*DIGIT.
hdr = strings.Trim(hdr, ows) // per RFC 7230, section 3.2
suffix, hasUnits := strings.CutPrefix(hdr, "bytes ")
suffix, unsatisfied := strings.CutPrefix(suffix, "*/")
if unsatisfied { // i.e., unsatisfied-range
n, ok := parseNumber(suffix)
if !ok {
return start, length, completeLength, false
}
completeLength = n
} else { // i.e., byte-range "/" (complete-length | "*")
prefix, suffix, hasDash := strings.Cut(suffix, "-")
middle, suffix, hasSlash := strings.Cut(suffix, "/")
n, ok0 := parseNumber(prefix)
m, ok1 := parseNumber(middle)
o, ok2 := parseNumber(suffix)
if suffix == "*" {
o, ok2 = -1, true
}
if !hasDash || !hasSlash || !ok0 || !ok1 || !ok2 || m < n || (o >= 0 && o <= m) {
return start, length, completeLength, false
}
start = n
length = m - n + 1
completeLength = o
}
return start, length, completeLength, hasUnits
}
// FormatContentRange parses a "Content-Range" header per RFC 7233, section 4.2.
// It only handles "Content-Range" headers where the units is "bytes".
// The "Content-Range" header is usually only specified in HTTP responses.
//
// If start and length are non-positive, then it encodes just the completeLength,
// which must be a non-negative value.
//
// Otherwise, it encodes the start and length as a byte-range,
// and optionally emits the complete length if it is non-negative.
// The length must be positive (as RFC 7233 uses inclusive end offsets).
func FormatContentRange(start, length, completeLength int64) (hdr string, ok bool) {
b := []byte("bytes ")
switch {
case start <= 0 && length <= 0 && completeLength >= 0: // i.e., unsatisfied-range
b = append(b, "*/"...)
b = strconv.AppendUint(b, uint64(completeLength), 10)
ok = true
case start >= 0 && length > 0: // i.e., byte-range "/" (complete-length | "*")
b = strconv.AppendUint(b, uint64(start), 10)
b = append(b, '-')
b = strconv.AppendUint(b, uint64(start+length-1), 10)
b = append(b, '/')
if completeLength >= 0 {
b = strconv.AppendUint(b, uint64(completeLength), 10)
ok = completeLength >= start+length && start+length > 0
} else {
b = append(b, '*')
ok = true
}
}
return string(b), ok
}
// parseNumber parses s as an unsigned decimal integer.
// It parses according to the 1*DIGIT grammar, which allows leading zeros.
func parseNumber(s string) (int64, bool) {
suffix := strings.TrimLeft(s, "0123456789")
prefix := s[:len(s)-len(suffix)]
n, err := strconv.ParseInt(prefix, 10, 64)
return n, suffix == "" && err == nil
}

View File

@ -0,0 +1,96 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package httphdr
import (
"testing"
"github.com/google/go-cmp/cmp"
)
func valOk[T any](v T, ok bool) (out struct {
V T
Ok bool
}) {
out.V = v
out.Ok = ok
return out
}
func TestRange(t *testing.T) {
tests := []struct {
in string
want []Range
wantOk bool
roundtrip bool
}{
{"", nil, false, false},
{"1-3", nil, false, false},
{"units=1-3", []Range{{1, 3}}, false, false},
{"bytes=1-3", []Range{{1, 3}}, true, true},
{"bytes=#-3", nil, false, false},
{"bytes=#-", nil, false, false},
{"bytes=13", nil, false, false},
{"bytes=1-#", nil, false, false},
{"bytes=-#", nil, false, false},
{"bytes= , , , ,\t , \t 1-3", []Range{{1, 3}}, true, false},
{"bytes=1-1", []Range{{1, 1}}, true, true},
{"bytes=01-01", []Range{{1, 1}}, true, false},
{"bytes=1-0", nil, false, false},
{"bytes=0-5,2-3", []Range{{0, 6}, {2, 2}}, true, true},
{"bytes=2-3,0-5", []Range{{2, 2}, {0, 6}}, true, true},
{"bytes=0-5,2-,-5", []Range{{0, 6}, {2, 0}, {0, -5}}, true, true},
}
for _, tt := range tests {
got, gotOk := ParseRange(tt.in)
if d := cmp.Diff(valOk(got, gotOk), valOk(tt.want, tt.wantOk)); d != "" {
t.Errorf("ParseRange(%q) mismatch (-got +want):\n%s", tt.in, d)
}
if tt.roundtrip {
got, gotOk := FormatRange(tt.want)
if d := cmp.Diff(valOk(got, gotOk), valOk(tt.in, tt.wantOk)); d != "" {
t.Errorf("FormatRange(%v) mismatch (-got +want):\n%s", tt.want, d)
}
}
}
}
type contentRange struct{ Start, Length, CompleteLength int64 }
func TestContentRange(t *testing.T) {
tests := []struct {
in string
want contentRange
wantOk bool
roundtrip bool
}{
{"", contentRange{}, false, false},
{"bytes 5-6/*", contentRange{5, 2, -1}, true, true},
{"units 5-6/*", contentRange{}, false, false},
{"bytes 5-6/*", contentRange{}, false, false},
{"bytes 5-5/*", contentRange{5, 1, -1}, true, true},
{"bytes 5-4/*", contentRange{}, false, false},
{"bytes 5-5/6", contentRange{5, 1, 6}, true, true},
{"bytes 05-005/0006", contentRange{5, 1, 6}, true, false},
{"bytes 5-5/5", contentRange{}, false, false},
{"bytes #-5/6", contentRange{}, false, false},
{"bytes 5-#/6", contentRange{}, false, false},
{"bytes 5-5/#", contentRange{}, false, false},
}
for _, tt := range tests {
start, length, completeLength, gotOk := ParseContentRange(tt.in)
got := contentRange{start, length, completeLength}
if d := cmp.Diff(valOk(got, gotOk), valOk(tt.want, tt.wantOk)); d != "" {
t.Errorf("ParseContentRange mismatch (-got +want):\n%s", d)
}
if tt.roundtrip {
got, gotOk := FormatContentRange(tt.want.Start, tt.want.Length, tt.want.CompleteLength)
if d := cmp.Diff(valOk(got, gotOk), valOk(tt.in, tt.wantOk)); d != "" {
t.Errorf("FormatContentRange mismatch (-got +want):\n%s", d)
}
}
}
}