Commit 05aa2b70 authored by Amos Wenger's avatar Amos Wenger

Teach boar about sniffing files

parent be05aba4
Pipeline #10349 passed with stage
in 2 minutes and 10 seconds
/gopath
/boar-fuzz.zip
/workdir
......@@ -11,6 +11,8 @@ import (
)
func main() {
log.SetFlags(0)
args := os.Args[1:]
if len(args) < 1 {
log.Fatal("Usage: lilboar FILE [...FILE]")
......@@ -18,23 +20,14 @@ func main() {
consumer := &state.Consumer{
OnMessage: func(lvl string, msg string) {
log.Printf("[%s] %s", lvl, msg)
log.Printf("%s", msg)
},
}
ignoreErrors := len(args) > 1
errorf := func(msg string, args ...interface{}) {
if ignoreErrors {
return
}
consumer.Errorf(msg, args...)
}
doFile := func(filePath string) {
file, err := eos.Open(filePath)
if err != nil {
errorf("%v", err)
consumer.Errorf("%s: %v", filePath, err)
return
}
defer file.Close()
......@@ -44,7 +37,7 @@ func main() {
Consumer: consumer,
})
if err != nil {
errorf("%v", err)
consumer.Errorf("%s: %v", filePath, err)
return
}
......
//+build gofuzz
package boar
import (
"github.com/itchio/boar/memfs"
)
_dummyConsumer := &state.Consumer{}
func Fuzz(data []byte) int {
file := memfs.New(data, "data")
_, err := Probe(&ProbeParams{
File: file,
Consumer: dummyConsumer,
})
if err != nil {
panic(err)
}
return 0
}
package memfs
import (
"bytes"
"os"
"time"
"github.com/itchio/wharf/eos"
)
// New returns an eos.File with the given data and name
func New(data []byte, name string) eos.File {
return &memoryFile{
br: bytes.NewReader(data),
name: name,
}
}
type memoryFile struct {
br *bytes.Reader
name string
}
var _ eos.File = (*memoryFile)(nil)
func (mf *memoryFile) Close() error {
// all hail the all-seeing eye of the GC
return nil
}
func (mf *memoryFile) Read(buf []byte) (int, error) {
return mf.br.Read(buf)
}
func (mf *memoryFile) ReadAt(buf []byte, offset int64) (int, error) {
return mf.br.ReadAt(buf, offset)
}
func (mf *memoryFile) Seek(offset int64, whence int) (int64, error) {
return mf.br.Seek(offset, whence)
}
func (mf *memoryFile) Stat() (os.FileInfo, error) {
return &memoryFileInfo{mf}, nil
}
// memoryFileInfo implements os.FileInfo for memoryfiles
type memoryFileInfo struct {
mf *memoryFile
}
var _ os.FileInfo = (*memoryFileInfo)(nil)
func (mfi *memoryFileInfo) Name() string {
return mfi.mf.name
}
func (mfi *memoryFileInfo) Size() int64 {
return mfi.mf.br.Size()
}
func (mfi *memoryFileInfo) Mode() os.FileMode {
return os.FileMode(0)
}
func (mfi *memoryFileInfo) ModTime() time.Time {
return time.Now()
}
func (mfi *memoryFileInfo) IsDir() bool {
return false
}
func (mfi *memoryFileInfo) Sys() interface{} {
return nil
}
......@@ -22,21 +22,40 @@ import (
"github.com/itchio/wharf/state"
)
type ArchiveStrategy int
type Strategy int
const (
ArchiveStrategyNone ArchiveStrategy = 0
StrategyNone Strategy = 0
ArchiveStrategyZip ArchiveStrategy = 100
StrategyZip Strategy = 100
ArchiveStrategyTar ArchiveStrategy = 200
ArchiveStrategyTarGz ArchiveStrategy = 201
ArchiveStrategyTarBz2 ArchiveStrategy = 202
ArchiveStrategyTarXz ArchiveStrategy = 203
StrategyTar Strategy = 200
StrategyTarGz Strategy = 201
StrategyTarBz2 Strategy = 202
StrategyTarXz Strategy = 203
ArchiveStrategySevenZip ArchiveStrategy = 300
StrategySevenZip Strategy = 300
)
func (as Strategy) String() string {
switch as {
case StrategyZip:
return "zip"
case StrategyTar:
return "tar"
case StrategyTarGz:
return "tar.gz"
case StrategyTarBz2:
return "tar.bz2"
case StrategyTarXz:
return "tar.xz"
case StrategySevenZip:
return "7-zip"
default:
return "<no strategy>"
}
}
type StageTwoStrategy int
const (
......@@ -59,40 +78,40 @@ type EntriesLister interface {
Entries() []*savior.Entry
}
type ArchiveInfo struct {
Strategy ArchiveStrategy
type Info struct {
Strategy Strategy
Features savior.ExtractorFeatures
Format string
StageTwoStrategy StageTwoStrategy
PostExtract []string
}
func (ai *ArchiveInfo) String() string {
func (ai *Info) String() string {
res := ""
res += fmt.Sprintf("Format: %s", ai.Format)
res += fmt.Sprintf(", Features: %s", ai.Features)
res += fmt.Sprintf("%s (via %s)", ai.Format, ai.Strategy)
res += fmt.Sprintf(", %s", ai.Features)
if ai.StageTwoStrategy != StageTwoStrategyNone {
res += fmt.Sprintf(", StageTwoStrategy: %s", ai.StageTwoStrategy)
res += fmt.Sprintf(", PostExtract: %v", ai.PostExtract)
res += fmt.Sprintf(", stage two: %s", ai.StageTwoStrategy)
res += fmt.Sprintf(", post-extract: %v", ai.PostExtract)
}
return res
}
func Probe(params *ProbeParams) (*ArchiveInfo, error) {
var strategy ArchiveStrategy
func Probe(params *ProbeParams) (*Info, error) {
var strategy Strategy
if params.Candidate != nil && params.Candidate.Flavor == dash.FlavorNativeLinux {
// might be a mojosetup installer - if not, we won't know what to do with it
strategy = ArchiveStrategyZip
strategy = StrategyZip
} else {
strategy = getStrategy(params.File, params.Consumer)
}
if strategy == ArchiveStrategyNone {
if strategy == StrategyNone {
return nil, ErrUnrecognizedArchiveType
}
info := &ArchiveInfo{
info := &Info{
Strategy: strategy,
}
......@@ -102,12 +121,36 @@ func Probe(params *ProbeParams) (*ArchiveInfo, error) {
return nil, errors.Wrap(err, "getting extractor for file")
}
info.Features = ex.Features()
if szex, ok := ex.(szextractor.SzExtractor); ok {
info.Format = szex.GetFormat()
preferNative := true
switch info.Format {
case "gzip":
info.Strategy = StrategyTarGz
case "bzip2":
info.Strategy = StrategyTarBz2
case "xz":
info.Strategy = StrategyTarXz
case "tar":
info.Strategy = StrategyTar
case "zip":
info.Strategy = StrategyZip
default:
preferNative = false
}
if preferNative {
ex, err = info.GetExtractor(params.File, params.Consumer)
if err != nil {
return nil, errors.Wrap(err, "getting extractor for file")
}
info.Format = info.Strategy.String()
}
} else {
info.Format = info.Strategy.String()
}
info.Features = ex.Features()
var entries []*savior.Entry
stageTwoStrategy := StageTwoStrategyNone
......@@ -183,11 +226,11 @@ func Probe(params *ProbeParams) (*ArchiveInfo, error) {
return info, nil
}
func getStrategy(file eos.File, consumer *state.Consumer) ArchiveStrategy {
func getStrategy(file eos.File, consumer *state.Consumer) Strategy {
stats, err := file.Stat()
if err != nil {
consumer.Warnf("archive: Could not stat file, giving up: %s", err.Error())
return ArchiveStrategyNone
return StrategyNone
}
lowerName := strings.ToLower(stats.Name())
......@@ -198,26 +241,25 @@ func getStrategy(file eos.File, consumer *state.Consumer) ArchiveStrategy {
switch ext {
case ".zip":
return ArchiveStrategyZip
return StrategyZip
case ".tar":
return ArchiveStrategyTar
return StrategyTar
case ".tar.gz":
return ArchiveStrategyTarGz
return StrategyTarGz
case ".tar.bz2":
return ArchiveStrategyTarBz2
return StrategyTarBz2
case ".tar.xz":
return ArchiveStrategyTarXz
return StrategyTarXz
case ".7z", ".rar", ".dmg", ".exe":
return ArchiveStrategySevenZip
return StrategySevenZip
}
consumer.Warnf("archive: Unrecognized extension (%s), deferring to 7-zip", ext)
return ArchiveStrategySevenZip
return StrategySevenZip
}
func (ai *ArchiveInfo) GetExtractor(file eos.File, consumer *state.Consumer) (savior.Extractor, error) {
func (ai *Info) GetExtractor(file eos.File, consumer *state.Consumer) (savior.Extractor, error) {
switch ai.Strategy {
case ArchiveStrategyZip:
case StrategyZip:
stats, err := file.Stat()
if err != nil {
return nil, errors.Wrap(err, "stat'ing file to open as zip archive")
......@@ -228,19 +270,19 @@ func (ai *ArchiveInfo) GetExtractor(file eos.File, consumer *state.Consumer) (sa
return nil, errors.Wrap(err, "creating zip extractor")
}
return ex, nil
case ArchiveStrategyTar:
case StrategyTar:
return tarextractor.New(seeksource.FromFile(file)), nil
case ArchiveStrategyTarGz:
case StrategyTarGz:
return tarextractor.New(gzipsource.New(seeksource.FromFile(file))), nil
case ArchiveStrategyTarBz2:
case StrategyTarBz2:
return tarextractor.New(bzip2source.New(seeksource.FromFile(file))), nil
case ArchiveStrategyTarXz:
case StrategyTarXz:
xs, err := xzsource.New(file, consumer)
if err != nil {
return nil, errors.Wrap(err, "creating xz extractor")
}
return tarextractor.New(xs), nil
case ArchiveStrategySevenZip:
case StrategySevenZip:
szex, err := szextractor.New(file, consumer)
if err != nil {
return nil, errors.Wrap(err, "creating 7-zip extractor")
......@@ -258,22 +300,5 @@ func (ai *ArchiveInfo) GetExtractor(file eos.File, consumer *state.Consumer) (sa
}
}
return nil, fmt.Errorf("unknown ArchiveStrategy %d", ai.Strategy)
}
var (
archiveStrategyStrings = map[ArchiveStrategy]string{
ArchiveStrategyTar: "tar",
ArchiveStrategyTarBz2: "tar.bz2",
ArchiveStrategyTarGz: "tar.gz",
ArchiveStrategyZip: "zip",
}
)
func (as ArchiveStrategy) String() string {
str, ok := archiveStrategyStrings[as]
if !ok {
return "?"
}
return str
return nil, fmt.Errorf("unknown Strategy %d", ai.Strategy)
}
......@@ -12,20 +12,20 @@ import (
type StrategyTest struct {
fileName string
result ArchiveStrategy
result Strategy
}
var (
strategyTests = []StrategyTest{
{"foo_bar.zip", ArchiveStrategyZip},
{"foo_bar.tar", ArchiveStrategyTar},
{"foo_bar.tar.gz", ArchiveStrategyTarGz},
{"foo_bar.tar.bz2", ArchiveStrategyTarBz2},
{"foo_bar.7z", ArchiveStrategySevenZip},
{"foo_bar.rar", ArchiveStrategySevenZip},
{"foo_bar.dmg", ArchiveStrategySevenZip},
{"foo_bar.exe", ArchiveStrategySevenZip},
{"foo_bar", ArchiveStrategySevenZip},
{"foo_bar.zip", StrategyZip},
{"foo_bar.tar", StrategyTar},
{"foo_bar.tar.gz", StrategyTarGz},
{"foo_bar.tar.bz2", StrategyTarBz2},
{"foo_bar.7z", StrategySevenZip},
{"foo_bar.rar", StrategySevenZip},
{"foo_bar.dmg", StrategySevenZip},
{"foo_bar.exe", StrategySevenZip},
{"foo_bar", StrategySevenZip},
}
)
......@@ -45,7 +45,7 @@ func TestGetStrategyNoStat(t *testing.T) {
// Only one test case here
ff := fakeFile{}
strat := getStrategy(ff, &state.Consumer{})
assert.Equal(t, ArchiveStrategyNone, strat)
assert.Equal(t, StrategyNone, strat)
}
type fakeFile struct {
......
......@@ -12,7 +12,7 @@ import (
"github.com/itchio/wharf/state"
)
func (ai *ArchiveInfo) ApplyStageTwo(consumer *state.Consumer, aRes *savior.ExtractorResult, installFolder string) (*savior.ExtractorResult, error) {
func (ai *Info) ApplyStageTwo(consumer *state.Consumer, aRes *savior.ExtractorResult, installFolder string) (*savior.ExtractorResult, error) {
switch ai.StageTwoStrategy {
case StageTwoStrategyMojoSetup:
return ai.applyMojoSetupStageTwo(consumer, aRes, installFolder)
......@@ -22,7 +22,7 @@ func (ai *ArchiveInfo) ApplyStageTwo(consumer *state.Consumer, aRes *savior.Extr
return aRes, nil
}
func (ai *ArchiveInfo) applyMojoSetupStageTwo(consumer *state.Consumer, aRes *savior.ExtractorResult, installFolder string) (*savior.ExtractorResult, error) {
func (ai *Info) applyMojoSetupStageTwo(consumer *state.Consumer, aRes *savior.ExtractorResult, installFolder string) (*savior.ExtractorResult, error) {
if len(ai.PostExtract) == 0 {
consumer.Infof("No post-extract for mojosetup stage two")
}
......@@ -36,16 +36,16 @@ func (ai *ArchiveInfo) applyMojoSetupStageTwo(consumer *state.Consumer, aRes *sa
}
defer file.Close()
archiveInfo, err := Probe(&ProbeParams{
Info, err := Probe(&ProbeParams{
Consumer: consumer,
File: file,
})
if err != nil {
return errors.Wrap(err, "probing stage-two file")
}
consumer.Infof("鉁 Post-extract is a supported archive format (%s)", archiveInfo.Format)
consumer.Infof("鉁 Post-extract is a supported archive format (%s)", Info.Format)
ex, err := archiveInfo.GetExtractor(file, consumer)
ex, err := Info.GetExtractor(file, consumer)
if err != nil {
return errors.Wrap(err, "getting extractor for stage-two file")
}
......
......@@ -3,7 +3,6 @@ package szextractor
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
......@@ -46,6 +45,19 @@ type szExtractor struct {
freed bool
}
type attempt struct {
signature bool
ext string
}
func (a attempt) String() string {
if a.signature {
return "signature"
} else {
return fmt.Sprintf(".%s", a.ext)
}
}
var _ SzExtractor = (*szExtractor)(nil)
func New(file eos.File, consumer *state.Consumer) (SzExtractor, error) {
......@@ -77,64 +89,49 @@ func New(file eos.File, consumer *state.Consumer) (SzExtractor, error) {
ext := nameToExt(stats.Name())
in, err := sz.NewInStream(file, ext, stats.Size())
if err != nil {
return nil, errors.Wrap(err, "creating 7-zip input stream")
var attempts []attempt
if ext != "" {
attempts = append(attempts, attempt{ext: ext})
}
se.in = in
attempts = append(attempts, attempt{signature: true})
var tries []string
switch ext {
case "exe":
// some self-extracting installers only work when we set "cab" explicitly
attempts = append(attempts, attempt{ext: "cab"})
case "":
// .exe and .dmg won't work by signature, so we have to try them explicitly
attempts = append(attempts, attempt{ext: "exe"})
attempts = append(attempts, attempt{ext: "cab"})
attempts = append(attempts, attempt{ext: "dmg"})
}
// try by extension first
tries = append(tries, fmt.Sprintf("ext:%s", ext))
a, err := lib.OpenArchive(in, false)
if err != nil {
// try by signature next
_, err = in.Seek(0, io.SeekStart)
for _, attempt := range attempts {
in, err := sz.NewInStream(file, attempt.ext, stats.Size())
if err != nil {
return nil, errors.WithStack(err)
return nil, errors.Wrap(err, "creating 7-zip input stream")
}
tries = append(tries, "signature")
a, err = lib.OpenArchive(in, true)
if err != nil {
// With the current libc7zip setup, 7-zip will refuse to
// extract some self-extracting installers - for those,
// we need to give it the `.cab` extension instead
// Maybe the multivolume interface takes care of that?
// Command-line `7z` has no issue with them.
if ext == "exe" {
// if it was an .exe, try with a .cab extension
in.Free()
ext = "cab"
tries = append(tries, fmt.Sprintf("ext:%s", ext))
in, err := sz.NewInStream(file, ext, stats.Size())
if err != nil {
return nil, errors.Wrap(err, "creating input stream")
}
a, err = lib.OpenArchive(in, false) // by ext
if err != nil {
return nil, errors.WithMessage(err, fmt.Sprintf("could not open with 7-zip (tried %v)", tries))
}
} else {
// well, we're out of options
return nil, errors.Errorf("could not open with 7-zip (tried %v): %s", tries, stats.Name())
}
archive, err := lib.OpenArchive(in, attempt.signature)
if err == nil {
se.in = in
se.archive = archive
break
}
in.Free()
}
se.archive = a
se.format = a.GetArchiveFormat()
if se.archive == nil {
return nil, errors.Errorf("could not open with 7-zip, tried %v", attempts)
}
se.format = se.archive.GetArchiveFormat()
if se.format == "7z" {
// .7z is a known non-resumable format - resuming means a lot
// of extra IO and decompression work on already-extracted blocks,
// so we just don't want to do it on-the-fly
se.resumeSupport = savior.ResumeSupportNone
}
return se, nil
}
......
package szextractor_test
import (
"bytes"
"log"
"os"
"testing"
"time"
"github.com/itchio/boar/memfs"
"github.com/itchio/boar/szextractor"
"github.com/itchio/savior"
"github.com/itchio/savior/checker"
"github.com/itchio/wharf/eos"
"github.com/itchio/wharf/state"
"github.com/stretchr/testify/assert"
)
......@@ -26,10 +23,7 @@ func TestSzExtractor(t *testing.T) {
sink := checker.MakeTestSinkAdvanced(40)
zipBytes := checker.MakeZip(t, sink)
file := &memoryFile{
br: bytes.NewReader(zipBytes),
name: "szextractor_test.zip",
}
file := memfs.New(zipBytes, "szextractor_test.zip")
initialConsumer := &state.Consumer{
OnMessage: func(lvl string, message string) {
......@@ -60,62 +54,3 @@ func TestSzExtractor(t *testing.T) {
return i%2 == 0
})
}
type memoryFile struct {
br *bytes.Reader
name string
}
var _ eos.File = (*memoryFile)(nil)
func (mf *memoryFile) Close() error {
// all hail the all-seeing eye of the GC
return nil
}
func (mf *memoryFile) Read(buf []byte) (int, error) {
return mf.br.Read(buf)
}
func (mf *memoryFile) ReadAt(buf []byte, offset int64) (int, error) {
return mf.br.ReadAt(buf, offset)
}
func (mf *memoryFile) Seek(offset int64, whence int) (int64, error) {
return mf.br.Seek(offset, whence)
}
func (mf *memoryFile) Stat() (os.FileInfo, error) {
return &memoryFileInfo{mf}, nil
}
// memoryFileInfo implements os.FileInfo for memoryfiles
type memoryFileInfo struct {
mf *memoryFile
}
var _ os.FileInfo = (*memoryFileInfo)(nil)
func (mfi *memoryFileInfo) Name() string {
return mfi.mf.name
}
func (mfi *memoryFileInfo) Size() int64 {
return mfi.mf.br.Size()
}
func (mfi *memoryFileInfo) Mode() os.FileMode {
return os.FileMode(0)
}
func (mfi *memoryFileInfo) ModTime() time.Time {
return time.Now()
}
func (mfi *memoryFileInfo) IsDir() bool {
return false
}
func (mfi *memoryFileInfo) Sys() interface{} {
return nil
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment