packages/deb: add package to extract metadata from .deb files.

Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
David Anderson 2021-05-04 20:42:50 -07:00 committed by Dave Anderson
parent 1c6946f971
commit 8236464252
2 changed files with 386 additions and 0 deletions

184
packages/deb/deb.go Normal file
View File

@ -0,0 +1,184 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package deb extracts metadata from Debian packages.
package deb
import (
"archive/tar"
"bufio"
"bytes"
"compress/gzip"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
)
// Info is the Debian package metadata needed to integrate the package
// into a repository.
type Info struct {
// Version is the version of the package, as reported by dpkg.
Version string
// Arch is the Debian CPU architecture the package is for.
Arch string
// Control is the entire contents of the package's control file,
// with leading and trailing whitespace removed.
Control []byte
// MD5 is the MD5 hash of the package file.
MD5 []byte
// SHA1 is the SHA1 hash of the package file.
SHA1 []byte
// SHA256 is the SHA256 hash of the package file.
SHA256 []byte
}
// ReadFile returns Debian package metadata from the .deb file at path.
func ReadFile(path string) (*Info, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
return Read(f)
}
// Read returns Debian package metadata from the .deb file in r.
func Read(r io.Reader) (*Info, error) {
b := bufio.NewReader(r)
m5, s1, s256 := md5.New(), sha1.New(), sha256.New()
summers := io.MultiWriter(m5, s1, s256)
r = io.TeeReader(b, summers)
t, err := findControlTar(r)
if err != nil {
return nil, fmt.Errorf("searching for control.tar.gz: %w", err)
}
control, err := findControlFile(t)
if err != nil {
return nil, fmt.Errorf("searching for control file in control.tar.gz: %w", err)
}
arch, version, err := findArchAndVersion(control)
if err != nil {
return nil, fmt.Errorf("extracting version and architecture from control file: %w", err)
}
// Exhaust the remainder of r, so that the summers see the entire file.
if _, err := io.Copy(ioutil.Discard, r); err != nil {
return nil, fmt.Errorf("hashing file: %w", err)
}
return &Info{
Version: version,
Arch: arch,
Control: control,
MD5: m5.Sum(nil),
SHA1: s1.Sum(nil),
SHA256: s256.Sum(nil),
}, nil
}
// findControlTar reads r as an `ar` archive, finds a tarball named
// `control.tar.gz` within, and returns a reader for that file.
func findControlTar(r io.Reader) (tarReader io.Reader, err error) {
var magic [8]byte
if _, err := io.ReadFull(r, magic[:]); err != nil {
return nil, fmt.Errorf("reading ar magic: %w", err)
}
if string(magic[:]) != "!<arch>\n" {
return nil, fmt.Errorf("not an ar file (bad magic %q)", magic)
}
for {
var hdr [60]byte
if _, err := io.ReadFull(r, hdr[:]); err != nil {
return nil, fmt.Errorf("reading file header: %w", err)
}
filename := strings.TrimSpace(string(hdr[:16]))
size, err := strconv.ParseInt(strings.TrimSpace(string(hdr[48:58])), 10, 64)
if err != nil {
return nil, fmt.Errorf("reading size of file %q: %w", filename, err)
}
if filename == "control.tar.gz" {
return io.LimitReader(r, size), nil
}
// files in ar are padded out to 2 bytes.
if size%2 == 1 {
size++
}
if _, err := io.CopyN(ioutil.Discard, r, size); err != nil {
return nil, fmt.Errorf("seeking past file %q: %w", filename, err)
}
}
}
// findControlFile reads r as a tar.gz archive, finds a file named
// `control` within, and returns its contents.
func findControlFile(r io.Reader) (control []byte, err error) {
gz, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("decompressing control.tar.gz: %w", err)
}
defer gz.Close()
tr := tar.NewReader(gz)
for {
hdr, err := tr.Next()
if err != nil {
if errors.Is(err, io.EOF) {
return nil, errors.New("EOF while looking for control file in control.tar.gz")
}
return nil, fmt.Errorf("reading tar header: %w", err)
}
if filepath.Clean(hdr.Name) != "control" {
continue
}
// Found control file
break
}
bs, err := ioutil.ReadAll(tr)
if err != nil {
return nil, fmt.Errorf("reading control file: %w", err)
}
return bytes.TrimSpace(bs), nil
}
var (
archKey = []byte("Architecture:")
versionKey = []byte("Version:")
)
// findArchAndVersion extracts the architecture and version strings
// from the given control file.
func findArchAndVersion(control []byte) (arch string, version string, err error) {
b := bytes.NewBuffer(control)
for {
l, err := b.ReadBytes('\n')
if err != nil {
return "", "", err
}
if bytes.HasPrefix(l, archKey) {
arch = string(bytes.TrimSpace(l[len(archKey):]))
} else if bytes.HasPrefix(l, versionKey) {
version = string(bytes.TrimSpace(l[len(versionKey):]))
}
if arch != "" && version != "" {
return arch, version, nil
}
}
}

202
packages/deb/deb_test.go Normal file
View File

@ -0,0 +1,202 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package deb
import (
"bytes"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"fmt"
"hash"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/goreleaser/nfpm"
_ "github.com/goreleaser/nfpm/deb"
)
func TestDebInfo(t *testing.T) {
tests := []struct {
name string
in []byte
want *Info
wantErr bool
}{
{
name: "simple",
in: mkTestDeb("1.2.3", "amd64"),
want: &Info{
Version: "1.2.3",
Arch: "amd64",
Control: mkControl(
"Package", "tailscale",
"Version", "1.2.3",
"Section", "net",
"Priority", "extra",
"Architecture", "amd64",
"Installed-Size", "0",
"Description", "test package"),
},
},
{
name: "arm64",
in: mkTestDeb("1.2.3", "arm64"),
want: &Info{
Version: "1.2.3",
Arch: "arm64",
Control: mkControl(
"Package", "tailscale",
"Version", "1.2.3",
"Section", "net",
"Priority", "extra",
"Architecture", "arm64",
"Installed-Size", "0",
"Description", "test package"),
},
},
{
name: "unstable",
in: mkTestDeb("1.7.25", "amd64"),
want: &Info{
Version: "1.7.25",
Arch: "amd64",
Control: mkControl(
"Package", "tailscale",
"Version", "1.7.25",
"Section", "net",
"Priority", "extra",
"Architecture", "amd64",
"Installed-Size", "0",
"Description", "test package"),
},
},
// These truncation tests assume the structure of a .deb
// package, which is as follows:
// magic: 8 bytes
// file header: 60 bytes, before each file blob
//
// The first file in a .deb ar is "debian-binary", which is 4
// bytes long and consists of "2.0\n".
// The second file is control.tar.gz, which is what we care
// about introspecting for metadata.
// The final file is data.tar.gz, which we don't care about.
//
// The first file in control.tar.gz is the "control" file we
// want to read for metadata.
{
name: "truncated_ar_magic",
in: mkTestDeb("1.7.25", "amd64")[:4],
wantErr: true,
},
{
name: "truncated_ar_header",
in: mkTestDeb("1.7.25", "amd64")[:30],
wantErr: true,
},
{
name: "missing_control_tgz",
// Truncate right after the "debian-binary" file, which
// makes the file a valid 1-file archive that's missing
// control.tar.gz.
in: mkTestDeb("1.7.25", "amd64")[:72],
wantErr: true,
},
{
name: "truncated_tgz",
in: mkTestDeb("1.7.25", "amd64")[:172],
wantErr: true,
},
}
for _, test := range tests {
// mkTestDeb returns non-deterministic output due to
// timestamps embedded in the package file, so compute the
// wanted hashes on the fly here.
if test.want != nil {
test.want.MD5 = mkHash(test.in, md5.New)
test.want.SHA1 = mkHash(test.in, sha1.New)
test.want.SHA256 = mkHash(test.in, sha256.New)
}
t.Run(test.name, func(t *testing.T) {
b := bytes.NewBuffer(test.in)
got, err := Read(b)
if err != nil {
if test.wantErr {
t.Logf("got expected error: %v", err)
return
}
t.Fatalf("reading deb info: %v", err)
}
if diff := diff(got, test.want); diff != "" {
t.Fatalf("parsed info diff (-got+want):\n%s", diff)
}
})
}
}
func diff(got, want interface{}) string {
matchField := func(name string) func(p cmp.Path) bool {
return func(p cmp.Path) bool {
if len(p) != 3 {
return false
}
return p[2].String() == "."+name
}
}
toLines := cmp.Transformer("lines", func(b []byte) []string { return strings.Split(string(b), "\n") })
toHex := cmp.Transformer("hex", func(b []byte) string { return hex.EncodeToString(b) })
return cmp.Diff(got, want,
cmp.FilterPath(matchField("Control"), toLines),
cmp.FilterPath(matchField("MD5"), toHex),
cmp.FilterPath(matchField("SHA1"), toHex),
cmp.FilterPath(matchField("SHA256"), toHex))
}
func mkTestDeb(version, arch string) []byte {
info := nfpm.WithDefaults(&nfpm.Info{
Name: "tailscale",
Description: "test package",
Arch: arch,
Platform: "linux",
Version: version,
Section: "net",
Priority: "extra",
})
pkg, err := nfpm.Get("deb")
if err != nil {
panic(fmt.Sprintf("getting deb packager: %v", err))
}
var b bytes.Buffer
if err := pkg.Package(info, &b); err != nil {
panic(fmt.Sprintf("creating deb package: %v", err))
}
return b.Bytes()
}
func mkControl(fs ...string) []byte {
if len(fs)%2 != 0 {
panic("odd number of control file fields")
}
var b bytes.Buffer
for i := 0; i < len(fs); i = i + 2 {
k, v := fs[i], fs[i+1]
fmt.Fprintf(&b, "%s: %s\n", k, v)
}
return bytes.TrimSpace(b.Bytes())
}
func mkHash(b []byte, hasher func() hash.Hash) []byte {
h := hasher()
h.Write(b)
return h.Sum(nil)
}