Skip to content

Commit

Permalink
Support extraction whitelist
Browse files Browse the repository at this point in the history
Closes itchio#11

- Add only_files query param to /extract
- Rename some args and fields for clarity
- Analyzer now inspects file name, not bucket key
- Test extraction results with only_files
- Delete MusicAnalyzer and "contents" query param
  • Loading branch information
hryx committed Nov 29, 2022
1 parent 52f949a commit 4241909
Show file tree
Hide file tree
Showing 13 changed files with 118 additions and 230 deletions.
4 changes: 2 additions & 2 deletions zipserver/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ var ErrSkipped = errors.New("skipped file")
type Analyzer interface {
// Analyze should return info about the contained file.
// It should return ErrSkipped if a file was ignored.
Analyze(r io.Reader, key string) (AnalyzeResult, error)
Analyze(r io.Reader, filename string) (AnalyzeResult, error)
}

type AnalyzeResult struct {
Key string // Analysis may choose a new key/filename
RenameTo string // If non-empty, file should be renamed before uploading
Metadata interface{}
ContentType string
ContentEncoding string
Expand Down
36 changes: 20 additions & 16 deletions zipserver/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ func shouldIgnoreFile(fname string) bool {

// UploadFileTask contains the information needed to extract a single file from a .zip
type UploadFileTask struct {
File *zip.File
Key string
DestPathPrefix string
LocalFile *zip.File
}

// UploadFileResult is successful is Error is nil - in that case, it contains the
Expand All @@ -156,19 +156,16 @@ func uploadWorker(
defer func() { done <- struct{}{} }()

for task := range tasks {
file := task.File
key := task.Key

ctx, cancel := context.WithTimeout(ctx, time.Duration(a.Config.FilePutTimeout))
info, err := a.extractAndUploadOne(ctx, key, file, analyzer)
info, err := a.extractAndUploadOne(ctx, task, analyzer)
cancel() // Free resources now instead of deferring till func returns

if err != nil {
if errors.Is(err, ErrSkipped) {
log.Printf("Skipping file: %s", key)
log.Printf("Skipping file: %s", task.LocalFile.Name)
continue
}
log.Print("Failed sending " + key + ": " + err.Error())
log.Print("Failed sending " + task.LocalFile.Name + ": " + err.Error())
results <- UploadFileResult{Error: err}
return
}
Expand Down Expand Up @@ -247,8 +244,10 @@ func (a *Archiver) sendZipExtracted(
go func() {
defer func() { close(tasks) }()
for _, file := range fileList {
key := path.Join(prefix, file.Name)
task := UploadFileTask{file, key}
task := UploadFileTask{
DestPathPrefix: prefix,
LocalFile: file,
}
select {
case tasks <- task:
case <-ctx.Done():
Expand Down Expand Up @@ -292,19 +291,19 @@ func (a *Archiver) sendZipExtracted(
// Caller should set the job timeout in ctx.
func (a *Archiver) extractAndUploadOne(
ctx context.Context,
key string,
file *zip.File,
task UploadFileTask,
analyzer Analyzer,
) (ExtractedFile, error) {
none := ExtractedFile{}
file := task.LocalFile

analyzerReader, err := file.Open()
if err != nil {
return none, err
}
defer analyzerReader.Close()

info, err := analyzer.Analyze(analyzerReader, key)
info, err := analyzer.Analyze(analyzerReader, file.Name)
if err != nil {
return none, err
}
Expand All @@ -316,12 +315,17 @@ func (a *Archiver) extractAndUploadOne(
}
defer uploadReader.Close()

log.Printf("Sending key=%q mime=%q encoding=%q", info.Key, info.ContentType, info.ContentEncoding)
sendName := file.Name
if info.RenameTo != "" {
sendName = info.RenameTo
}
destKey := path.Join(task.DestPathPrefix, sendName)
log.Printf("Sending key=%q mime=%q encoding=%q", destKey, info.ContentType, info.ContentEncoding)

var size uint64 // Written to by limitedReader
limited := limitedReader(uploadReader, file.UncompressedSize64, &size)

err = a.Storage.PutFileWithSetup(ctx, a.Bucket, info.Key, limited, func(r *http.Request) error {
err = a.Storage.PutFileWithSetup(ctx, a.Bucket, destKey, limited, func(r *http.Request) error {
r.Header.Set("X-Goog-Acl", "public-read")
r.Header.Set("Content-Type", info.ContentType)
if info.ContentEncoding != "" {
Expand All @@ -334,7 +338,7 @@ func (a *Archiver) extractAndUploadOne(
}

return ExtractedFile{
Key: info.Key,
Key: destKey,
Size: size,
Metadata: info.Metadata,
}, nil
Expand Down
41 changes: 41 additions & 0 deletions zipserver/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,47 @@ func Test_ExtractInMemory(t *testing.T) {
assert.EqualValues(t, k, storage.objectPath(config.Bucket, zipPath), "make sure the only remaining object is the zip")
}
})

storage, err = NewMemStorage()

withZip(&zipLayout{
entries: []zipEntry{
{
name: "a/b/c/file1",
data: []byte("data"),
expectedMimeType: "text/plain; charset=utf-8",
},
{
name: "file2.txt",
data: []byte("data"),
expectedMimeType: "text/plain; charset=utf-8",
},
{
name: "a/b/c.other",
data: []byte("data"),
expectedMimeType: "text/plain; charset=utf-8",
},
{
name: "file4",
data: []byte("data"),
expectedMimeType: "text/plain; charset=utf-8",
},
},
}, func(zl *zipLayout) {
limits := testLimits()
require.NoError(t, err)
archiver = &Archiver{storage, config}
analyzer = &GameAnalyzer{
onlyExtractFiles: []string{"a/b/c.other", "a/b/nonexistent", "file2.txt"},
}

res, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer)
require.NoError(t, err)
// Can't compare the entire slice verbatim because of nondeterministic workload order
assert.Len(t, res, 2)
assert.Contains(t, res, ExtractedFile{Key: "zipserver_test/mem_test_extracted/file2.txt", Size: 4})
assert.Contains(t, res, ExtractedFile{Key: "zipserver_test/mem_test_extracted/a/b/c.other", Size: 4})
})
}

// TestFetchZipFailing simulates a download failing after the ouptut file has been created,
Expand Down
22 changes: 17 additions & 5 deletions zipserver/extract_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package zipserver
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -94,6 +95,20 @@ func extractHandler(w http.ResponseWriter, r *http.Request) error {
return err
}

fileListParam := params.Get("only_files")
if err != nil {
return err
}
var fileList []string
if fileListParam != "" {
if err := json.Unmarshal([]byte(fileListParam), &fileList); err != nil {
return fmt.Errorf("unmarshal files param: %w", err)
}
if len(fileList) == 0 {
return fmt.Errorf("fileList param was empty")
}
}

hasLock := tryLockKey(key)
if !hasLock {
// already being extracted in another handler, ask consumer to wait
Expand All @@ -102,11 +117,8 @@ func extractHandler(w http.ResponseWriter, r *http.Request) error {

limits := loadLimits(params, config)

var analyzer Analyzer
if params.Get("contents") == "music" {
analyzer = &MusicAnalyzer{}
} else {
analyzer = &GameAnalyzer{}
analyzer := &GameAnalyzer{
onlyExtractFiles: fileList,
}

process := func(ctx context.Context) ([]ExtractedFile, error) {
Expand Down
10 changes: 10 additions & 0 deletions zipserver/extract_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package zipserver

import (
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"

Expand Down Expand Up @@ -42,3 +44,11 @@ func Test_Limits(t *testing.T) {
el = loadLimits(values, &defaultConfig)
assert.EqualValues(t, el.MaxFileSize, customMaxFileSize)
}

func TestExtractHandlerMissingQueryParam(t *testing.T) {
testServer := httptest.NewServer(errorHandler(extractHandler))
defer testServer.Close()
res, err := http.Get(testServer.URL + "/extract")
require.NoError(t, err)
assert.Equal(t, http.StatusInternalServerError, res.StatusCode)
}
36 changes: 28 additions & 8 deletions zipserver/game_analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,32 @@ import (

// GameAnalyzer uses rules applying to HTML5 game uploads.
// gzip-compressed files are marked with the appropriate content type and encoding.
type GameAnalyzer struct{}
//
// If onlyExtractFiles is non-nil, files whose names not in the list will be skipped.
type GameAnalyzer struct {
onlyExtractFiles []string
}

func (d GameAnalyzer) shouldExtract(name string) bool {
if d.onlyExtractFiles == nil {
return true
}
for _, v := range d.onlyExtractFiles {
if name == v {
return true
}
}
return false
}

func (d GameAnalyzer) Analyze(r io.Reader, key string) (AnalyzeResult, error) {
res := AnalyzeResult{Key: key}
func (d GameAnalyzer) Analyze(r io.Reader, filename string) (AnalyzeResult, error) {
res := AnalyzeResult{}

if !d.shouldExtract(filename) {
return res, ErrSkipped
}

mimeType := mime.TypeByExtension(path.Ext(key))
mimeType := mime.TypeByExtension(path.Ext(filename))

var buffer bytes.Buffer
_, err := io.Copy(&buffer, io.LimitReader(r, 512))
Expand All @@ -27,29 +47,29 @@ func (d GameAnalyzer) Analyze(r io.Reader, key string) (AnalyzeResult, error) {
}

contentMimeType := http.DetectContentType(buffer.Bytes())
extension := path.Ext(key)
extension := path.Ext(filename)

if contentMimeType == "application/x-gzip" || contentMimeType == "application/gzip" {
res.ContentEncoding = "gzip"

// try to see if there's a real extension hidden beneath
if extension == ".gz" {
realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(key, ".gz")))
realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(filename, ".gz")))
if realMimeType != "" {
mimeType = realMimeType
}
} else {
// To support gzip-compressed exports from Unity 5.5 and below, rename file.
// https://docs.unity3d.com/550/Documentation/Manual/webgl-deploying.html
if replacement, ok := unityExtReplacements[extension]; ok {
res.Key = strings.TrimSuffix(key, extension) + replacement
res.RenameTo = strings.TrimSuffix(filename, extension) + replacement
}
}
} else if extension == ".br" {
// there is no way to detect a brotli stream by content, so we assume if it ends if .br then it's brotli
// this path is used for Unity 2020 webgl games built with brotli compression
res.ContentEncoding = "br"
realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(key, ".br")))
realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(filename, ".br")))
if realMimeType != "" {
mimeType = realMimeType
}
Expand Down
100 changes: 0 additions & 100 deletions zipserver/music_analyzer.go

This file was deleted.

Loading

0 comments on commit 4241909

Please sign in to comment.