diff --git a/go.mod b/go.mod index cf9eaee..78b6aad 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/itchio/zipserver go 1.17 require ( + github.com/dhowden/tag v0.0.0-20220618230019-adf36e896086 github.com/go-errors/errors v1.4.2 github.com/stretchr/testify v1.7.0 golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 diff --git a/go.sum b/go.sum index 6d4448b..838f43d 100644 --- a/go.sum +++ b/go.sum @@ -42,6 +42,10 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dhowden/itl v0.0.0-20170329215456-9fbe21093131/go.mod h1:eVWQJVQ67aMvYhpkDwaH2Goy2vo6v8JCMfGXfQ9sPtw= +github.com/dhowden/plist v0.0.0-20141002110153-5db6e0d9931a/go.mod h1:sLjdR6uwx3L6/Py8F+QgAfeiuY87xuYGwCDqRFrvCzw= +github.com/dhowden/tag v0.0.0-20220618230019-adf36e896086 h1:ORubSQoKnncsBnR4zD9CuYFJCPOCuSNEpWEZrDdBXkc= +github.com/dhowden/tag v0.0.0-20220618230019-adf36e896086/go.mod h1:Z3Lomva4pyMWYezjMAU5QWRh0p1VvO4199OHlFnyKkM= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= diff --git a/zipserver/analyzer.go b/zipserver/analyzer.go new file mode 100644 index 0000000..ed8db17 --- /dev/null +++ b/zipserver/analyzer.go @@ -0,0 +1,25 @@ +package zipserver + +import ( + "errors" + "io" +) + +// ErrSkipped is non-critical and indicates that analysis +// chose to ignore a file. The file should not be uploaded. +var ErrSkipped = errors.New("skipped file") + +// Analyzer analyzes individual files in a zip archive. +// Behavior may change based on the intended workload. +type Analyzer interface { + // Analyze should return info about the contained file. + // It should return ErrSkipped if a file was ignored. + Analyze(r io.Reader, filename string) (AnalyzeResult, error) +} + +type AnalyzeResult struct { + RenameTo string // If non-empty, file should be renamed before uploading + Metadata interface{} + ContentType string + ContentEncoding string +} diff --git a/zipserver/archive.go b/zipserver/archive.go index c2adb6c..35d0b5c 100644 --- a/zipserver/archive.go +++ b/zipserver/archive.go @@ -1,7 +1,6 @@ package zipserver import ( - "bytes" "context" "crypto/md5" "encoding/hex" @@ -40,8 +39,9 @@ type Archiver struct { // ExtractedFile represents a file extracted from a .zip into a GCS bucket type ExtractedFile struct { - Key string - Size uint64 + Key string + Size uint64 + Metadata interface{} `json:",omitempty"` } // NewArchiver creates a new archiver from the given config @@ -133,21 +133,22 @@ func shouldIgnoreFile(fname string) bool { // UploadFileTask contains the information needed to extract a single file from a .zip type UploadFileTask struct { - File *zip.File - Key string + DestPathPrefix string + LocalFile *zip.File } // UploadFileResult is successful is Error is nil - in that case, it contains the // GCS key the file was uploaded under, and the number of bytes written for that file. +// An error causes the job to abort processing all further files in the archive. type UploadFileResult struct { Error error - Key string - Size uint64 + ExtractedFile } func uploadWorker( ctx context.Context, a *Archiver, + analyzer Analyzer, tasks <-chan UploadFileTask, results chan<- UploadFileResult, done chan struct{}, @@ -155,20 +156,21 @@ func uploadWorker( defer func() { done <- struct{}{} }() for task := range tasks { - file := task.File - key := task.Key - ctx, cancel := context.WithTimeout(ctx, time.Duration(a.Config.FilePutTimeout)) - resource, err := a.extractAndUploadOne(ctx, key, file) + info, err := a.extractAndUploadOne(ctx, task, analyzer) cancel() // Free resources now instead of deferring till func returns if err != nil { - log.Print("Failed sending " + key + ": " + err.Error()) - results <- UploadFileResult{err, key, 0} + if errors.Is(err, ErrSkipped) { + log.Printf("Skipping file: %s", task.LocalFile.Name) + continue + } + log.Print("Failed sending " + task.LocalFile.Name + ": " + err.Error()) + results <- UploadFileResult{Error: err} return } - results <- UploadFileResult{nil, resource.key, resource.size} + results <- UploadFileResult{ExtractedFile: info} } } @@ -177,6 +179,7 @@ func (a *Archiver) sendZipExtracted( ctx context.Context, prefix, fname string, limits *ExtractLimits, + analyzer Analyzer, ) ([]ExtractedFile, error) { zipReader, err := zip.OpenReader(fname) if err != nil { @@ -233,7 +236,7 @@ func (a *Archiver) sendZipExtracted( defer cancel() for i := 0; i < limits.ExtractionThreads; i++ { - go uploadWorker(ctx, a, tasks, results, done) + go uploadWorker(ctx, a, analyzer, tasks, results, done) } activeWorkers := limits.ExtractionThreads @@ -241,8 +244,10 @@ func (a *Archiver) sendZipExtracted( go func() { defer func() { close(tasks) }() for _, file := range fileList { - key := path.Join(prefix, file.Name) - task := UploadFileTask{file, key} + task := UploadFileTask{ + DestPathPrefix: prefix, + LocalFile: file, + } select { case tasks <- task: case <-ctx.Done(): @@ -262,7 +267,7 @@ func (a *Archiver) sendZipExtracted( extractError = result.Error cancel() } else { - extractedFiles = append(extractedFiles, ExtractedFile{result.Key, result.Size}) + extractedFiles = append(extractedFiles, result.ExtractedFile) fileCount++ } case <-done: @@ -284,77 +289,59 @@ func (a *Archiver) sendZipExtracted( // sends an individual file from a zip // Caller should set the job timeout in ctx. -func (a *Archiver) extractAndUploadOne(ctx context.Context, key string, file *zip.File) (*ResourceSpec, error) { - readerCloser, err := file.Open() - if err != nil { - return nil, err - } - defer readerCloser.Close() - - var reader io.Reader = readerCloser +func (a *Archiver) extractAndUploadOne( + ctx context.Context, + task UploadFileTask, + analyzer Analyzer, +) (ExtractedFile, error) { + none := ExtractedFile{} + file := task.LocalFile - resource := &ResourceSpec{ - key: key, + analyzerReader, err := file.Open() + if err != nil { + return none, err } + defer analyzerReader.Close() - // try determining MIME by extension - mimeType := mime.TypeByExtension(path.Ext(key)) - - var buffer bytes.Buffer - _, err = io.Copy(&buffer, io.LimitReader(reader, 512)) - + info, err := analyzer.Analyze(analyzerReader, file.Name) if err != nil { - return nil, errors.Wrap(err, 0) + return none, err } - contentMimeType := http.DetectContentType(buffer.Bytes()) - // join the bytes read and the original reader - reader = io.MultiReader(&buffer, reader) - - if contentMimeType == "application/x-gzip" || contentMimeType == "application/gzip" { - resource.contentEncoding = "gzip" - - // try to see if there's a real extension hidden beneath - if strings.HasSuffix(key, ".gz") { - realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(key, ".gz"))) - - if realMimeType != "" { - mimeType = realMimeType - } - } - - } else if strings.HasSuffix(key, ".br") { - // there is no way to detect a brotli stream by content, so we assume if it ends if .br then it's brotli - // this path is used for Unity 2020 webgl games built with brotli compression - resource.contentEncoding = "br" - realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(key, ".br"))) - - if realMimeType != "" { - mimeType = realMimeType - } - } else if mimeType == "" { - // fall back to the extension detected from content, eg. someone uploaded a .png with wrong extension - mimeType = contentMimeType + // Analysis may have called Read() but we cannot seek back, so open a new Reader with initialized cursor. + uploadReader, err := file.Open() + if err != nil { + return none, err } + defer uploadReader.Close() - if mimeType == "" { - // default mime type - mimeType = "application/octet-stream" + sendName := file.Name + if info.RenameTo != "" { + sendName = info.RenameTo } - resource.contentType = mimeType + destKey := path.Join(task.DestPathPrefix, sendName) + log.Printf("Sending key=%q mime=%q encoding=%q", destKey, info.ContentType, info.ContentEncoding) - resource.applyRewriteRules() + var size uint64 // Written to by limitedReader + limited := limitedReader(uploadReader, file.UncompressedSize64, &size) - log.Printf("Sending: %s", resource) - - limited := limitedReader(reader, file.UncompressedSize64, &resource.size) - - err = a.Storage.PutFileWithSetup(ctx, a.Bucket, resource.key, limited, resource.setupRequest) + err = a.Storage.PutFileWithSetup(ctx, a.Bucket, destKey, limited, func(r *http.Request) error { + r.Header.Set("X-Goog-Acl", "public-read") + r.Header.Set("Content-Type", info.ContentType) + if info.ContentEncoding != "" { + r.Header.Set("Content-Encoding", info.ContentEncoding) + } + return nil + }) if err != nil { - return resource, errors.Wrap(err, 0) + return none, errors.Wrap(err, 0) } - return resource, nil + return ExtractedFile{ + Key: destKey, + Size: size, + Metadata: info.Metadata, + }, nil } // ExtractZip downloads the zip at `key` to a temporary file, @@ -364,6 +351,7 @@ func (a *Archiver) ExtractZip( ctx context.Context, key, prefix string, limits *ExtractLimits, + analyzer Analyzer, ) ([]ExtractedFile, error) { fname, err := a.fetchZip(ctx, key) if err != nil { @@ -372,7 +360,7 @@ func (a *Archiver) ExtractZip( defer os.Remove(fname) prefix = path.Join(a.ExtractPrefix, prefix) - return a.sendZipExtracted(ctx, prefix, fname, limits) + return a.sendZipExtracted(ctx, prefix, fname, limits, analyzer) } // Caller should set the job timeout in ctx. @@ -382,5 +370,6 @@ func (a *Archiver) UploadZipFromFile( limits *ExtractLimits, ) ([]ExtractedFile, error) { prefix = path.Join("_zipserver", prefix) - return a.sendZipExtracted(ctx, prefix, fname, limits) + // TODO: Add CLI option to choose game or music content. + return a.sendZipExtracted(ctx, prefix, fname, limits, &GameAnalyzer{}) } diff --git a/zipserver/archive_test.go b/zipserver/archive_test.go index 1b51b5f..72d76be 100644 --- a/zipserver/archive_test.go +++ b/zipserver/archive_test.go @@ -40,20 +40,22 @@ func emptyConfig() *Config { } } +var analyzer = &GameAnalyzer{} + func Test_ExtractOnGCS(t *testing.T) { withGoogleCloudStorage(t, func(storage Storage, config *Config) { ctx := context.Background() archiver := &Archiver{storage, config} r, err := os.Open("/home/leafo/code/go/etlua.zip") - assert.NoError(t, err) + require.NoError(t, err) defer r.Close() err = storage.PutFile(ctx, config.Bucket, "zipserver_test/test.zip", r, "application/zip") - assert.NoError(t, err) + require.NoError(t, err) - _, err = archiver.ExtractZip(ctx, "zipserver_test/test.zip", "zipserver_test/extract", testLimits()) - assert.NoError(t, err) + _, err = archiver.ExtractZip(ctx, "zipserver_test/test.zip", "zipserver_test/extract", testLimits(), analyzer) + require.NoError(t, err) }) } @@ -76,18 +78,18 @@ func (zl *zipLayout) Write(t *testing.T, zw *zip.Writer) { Name: entry.name, UncompressedSize64: uint64(len(entry.data)), }) - assert.NoError(t, err) + require.NoError(t, err) _, err = io.Copy(writer, bytes.NewReader(entry.data)) - assert.NoError(t, err) + require.NoError(t, err) } } func (zl *zipLayout) Check(t *testing.T, storage *MemStorage, bucket, prefix string) { ctx := context.Background() - for _, entry := range zl.entries { - func() { + for i, entry := range zl.entries { + t.Run(fmt.Sprintf("file %d: %s", i, entry.name), func(t *testing.T) { name := entry.name if entry.outName != "" { name = entry.outName @@ -101,23 +103,23 @@ func (zl *zipLayout) Check(t *testing.T, storage *MemStorage, bucket, prefix str return } - assert.NoError(t, err) + require.NoError(t, err) defer reader.Close() data, err := io.ReadAll(reader) - assert.NoError(t, err) + require.NoError(t, err) assert.EqualValues(t, data, entry.data) h, err := storage.getHeaders(bucket, path) - assert.NoError(t, err) + require.NoError(t, err) assert.EqualValues(t, entry.expectedMimeType, h.Get("content-type")) assert.EqualValues(t, "public-read", h.Get("x-goog-acl")) if entry.expectedContentEncoding != "" { assert.EqualValues(t, entry.expectedContentEncoding, h.Get("content-encoding")) } - }() + }) } } @@ -127,13 +129,13 @@ func Test_ExtractInMemory(t *testing.T) { ctx := context.Background() storage, err := NewMemStorage() - assert.NoError(t, err) + require.NoError(t, err) archiver := &Archiver{storage, config} prefix := "zipserver_test/mem_test_extracted" zipPath := "mem_test.zip" - _, err = archiver.ExtractZip(ctx, zipPath, prefix, testLimits()) + _, err = archiver.ExtractZip(ctx, zipPath, prefix, testLimits(), analyzer) assert.Error(t, err) withZip := func(zl *zipLayout, cb func(zl *zipLayout)) { @@ -144,96 +146,96 @@ func Test_ExtractInMemory(t *testing.T) { zl.Write(t, zw) err = zw.Close() - assert.NoError(t, err) + require.NoError(t, err) err = storage.PutFile(ctx, config.Bucket, zipPath, bytes.NewReader(buf.Bytes()), "application/octet-stream") - assert.NoError(t, err) + require.NoError(t, err) cb(zl) } withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: "file.txt", data: []byte("Hello there"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "garbage.bin", data: bytes.Repeat([]byte{3, 1, 5, 3, 2, 6, 1, 2, 5, 3, 4, 6, 2}, 20), expectedMimeType: "application/octet-stream", }, - zipEntry{ + { name: "something.gz", data: []byte{0x1F, 0x8B, 0x08, 1, 5, 2, 4, 9, 3, 1, 2, 5}, expectedMimeType: "application/gzip", }, - zipEntry{ + { name: "something.unityweb", data: []byte{0x1F, 0x8B, 0x08, 9, 1, 5, 2, 3, 5, 2, 6, 4, 4}, expectedMimeType: "application/octet-stream", expectedContentEncoding: "gzip", }, - zipEntry{ + { name: "gamedata.memgz", outName: "gamedata.mem", data: []byte{0x1F, 0x8B, 0x08, 1, 5, 2, 3, 1, 2, 1, 2}, expectedMimeType: "application/octet-stream", expectedContentEncoding: "gzip", }, - zipEntry{ + { name: "gamedata.jsgz", outName: "gamedata.js", data: []byte{0x1F, 0x8B, 0x08, 3, 7, 3, 4, 12, 53, 26, 34}, expectedMimeType: "application/octet-stream", expectedContentEncoding: "gzip", }, - zipEntry{ + { name: "gamedata.asm.jsgz", outName: "gamedata.asm.js", data: []byte{0x1F, 0x8B, 0x08, 62, 34, 128, 37, 10, 39, 82}, expectedMimeType: "application/octet-stream", expectedContentEncoding: "gzip", }, - zipEntry{ + { name: "gamedata.datagz", outName: "gamedata.data", data: []byte{0x1F, 0x8B, 0x08, 8, 5, 23, 1, 25, 38}, expectedMimeType: "application/octet-stream", expectedContentEncoding: "gzip", }, - zipEntry{ + { name: "__MACOSX/hello", data: []byte{}, ignored: true, }, - zipEntry{ + { name: "/woops/hi/im/absolute", data: []byte{}, ignored: true, }, - zipEntry{ + { name: "oh/hey/im/a/dir/", data: []byte{}, ignored: true, }, - zipEntry{ + { name: "im/trying/to/escape/../../../../../../etc/hosts", data: []byte{}, ignored: true, }, }, }, func(zl *zipLayout) { - _, err := archiver.ExtractZip(ctx, zipPath, prefix, testLimits()) - assert.NoError(t, err) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, testLimits(), analyzer) + require.NoError(t, err) zl.Check(t, storage, config.Bucket, prefix) }) withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: strings.Repeat("x", 101), data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", @@ -243,14 +245,14 @@ func Test_ExtractInMemory(t *testing.T) { limits := testLimits() limits.MaxFileNameLength = 100 - _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "paths that are too long")) }) withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: "x", data: bytes.Repeat([]byte("oh no"), 100), expectedMimeType: "text/plain; charset=utf-8", @@ -260,29 +262,29 @@ func Test_ExtractInMemory(t *testing.T) { limits := testLimits() limits.MaxFileSize = 499 - _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "file that is too large")) }) withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: "1", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "2", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "3", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "4", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", @@ -292,29 +294,29 @@ func Test_ExtractInMemory(t *testing.T) { limits := testLimits() limits.MaxNumFiles = 3 - _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "Too many files")) }) withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: "1", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "2", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "3", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "4", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", @@ -324,36 +326,36 @@ func Test_ExtractInMemory(t *testing.T) { limits := testLimits() limits.MaxTotalSize = 6 - _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "zip too large")) }) // reset storage for this next test storage, err = NewMemStorage() - assert.NoError(t, err) + require.NoError(t, err) storage.planForFailure(config.Bucket, fmt.Sprintf("%s/%s", prefix, "3")) storage.putDelay = 200 * time.Millisecond archiver = &Archiver{storage, config} withZip(&zipLayout{ entries: []zipEntry{ - zipEntry{ + { name: "1", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "2", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "3", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", }, - zipEntry{ + { name: "4", data: []byte("uh oh"), expectedMimeType: "text/plain; charset=utf-8", @@ -362,7 +364,7 @@ func Test_ExtractInMemory(t *testing.T) { }, func(zl *zipLayout) { limits := testLimits() - _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits) + _, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "intentional failure")) @@ -371,6 +373,47 @@ func Test_ExtractInMemory(t *testing.T) { assert.EqualValues(t, k, storage.objectPath(config.Bucket, zipPath), "make sure the only remaining object is the zip") } }) + + storage, err = NewMemStorage() + + withZip(&zipLayout{ + entries: []zipEntry{ + { + name: "a/b/c/file1", + data: []byte("data"), + expectedMimeType: "text/plain; charset=utf-8", + }, + { + name: "file2.txt", + data: []byte("data"), + expectedMimeType: "text/plain; charset=utf-8", + }, + { + name: "a/b/c.other", + data: []byte("data"), + expectedMimeType: "text/plain; charset=utf-8", + }, + { + name: "file4", + data: []byte("data"), + expectedMimeType: "text/plain; charset=utf-8", + }, + }, + }, func(zl *zipLayout) { + limits := testLimits() + require.NoError(t, err) + archiver = &Archiver{storage, config} + analyzer = &GameAnalyzer{ + onlyExtractFiles: []string{"a/b/c.other", "a/b/nonexistent", "file2.txt"}, + } + + res, err := archiver.ExtractZip(ctx, zipPath, prefix, limits, analyzer) + require.NoError(t, err) + // Can't compare the entire slice verbatim because of nondeterministic workload order + assert.Len(t, res, 2) + assert.Contains(t, res, ExtractedFile{Key: "zipserver_test/mem_test_extracted/file2.txt", Size: 4}) + assert.Contains(t, res, ExtractedFile{Key: "zipserver_test/mem_test_extracted/a/b/c.other", Size: 4}) + }) } // TestFetchZipFailing simulates a download failing after the ouptut file has been created, diff --git a/zipserver/config_test.go b/zipserver/config_test.go index e1d7a5c..3a33088 100644 --- a/zipserver/config_test.go +++ b/zipserver/config_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_Config(t *testing.T) { @@ -18,22 +19,20 @@ func Test_Config(t *testing.T) { defer os.Remove(tmpFile.Name()) writeConfigBytes := func(bytes []byte) { + t.Helper() + _, err := tmpFile.Seek(0, os.SEEK_SET) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) _, err = tmpFile.Write(bytes) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) } writeConfig := func(c *Config) { + t.Helper() + bytes, err := json.Marshal(c) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) writeConfigBytes(bytes) } @@ -75,7 +74,7 @@ func Test_Config(t *testing.T) { }) c, err := LoadConfig(tmpFile.Name()) - assert.NoError(t, err) + require.NoError(t, err) assert.EqualValues(t, "/foo/bar.pem", c.PrivateKeyPath) assert.EqualValues(t, 92, c.MaxFileSize) diff --git a/zipserver/extract_handler.go b/zipserver/extract_handler.go index 57ec639..e22af1b 100644 --- a/zipserver/extract_handler.go +++ b/zipserver/extract_handler.go @@ -3,8 +3,10 @@ package zipserver import ( "bytes" "context" + "encoding/json" "errors" "fmt" + "io" "log" "net/http" "net/url" @@ -93,6 +95,20 @@ func extractHandler(w http.ResponseWriter, r *http.Request) error { return err } + fileListParam := params.Get("only_files") + if err != nil { + return err + } + var fileList []string + if fileListParam != "" { + if err := json.Unmarshal([]byte(fileListParam), &fileList); err != nil { + return fmt.Errorf("unmarshal files param: %w", err) + } + if len(fileList) == 0 { + return fmt.Errorf("fileList param was empty") + } + } + hasLock := tryLockKey(key) if !hasLock { // already being extracted in another handler, ask consumer to wait @@ -101,9 +117,13 @@ func extractHandler(w http.ResponseWriter, r *http.Request) error { limits := loadLimits(params, config) + analyzer := &GameAnalyzer{ + onlyExtractFiles: fileList, + } + process := func(ctx context.Context) ([]ExtractedFile, error) { archiver := NewArchiver(config) - files, err := archiver.ExtractZip(ctx, key, prefix, limits) + files, err := archiver.ExtractZip(ctx, key, prefix, limits, analyzer) return files, err } @@ -171,11 +191,19 @@ func extractHandler(w http.ResponseWriter, r *http.Request) error { } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - asyncResponse, err := http.DefaultClient.Do(req) - if err == nil { - asyncResponse.Body.Close() - } else { - log.Print("Failed to deliver callback: " + err.Error()) + res, err := http.DefaultClient.Do(req) + if err != nil { + log.Printf("Failed to deliver callback: %v", err) + return + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusNoContent { + body, err := io.ReadAll(res.Body) + if err != nil { + log.Printf("Read notification response: %v", err) + return + } + log.Printf("Notification response: %s %s", res.Status, string(body)) } })() diff --git a/zipserver/extract_handler_test.go b/zipserver/extract_handler_test.go index f71eed5..ab88a30 100644 --- a/zipserver/extract_handler_test.go +++ b/zipserver/extract_handler_test.go @@ -2,10 +2,13 @@ package zipserver import ( "fmt" + "net/http" + "net/http/httptest" "net/url" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_Locks(t *testing.T) { @@ -36,8 +39,16 @@ func Test_Limits(t *testing.T) { const customMaxFileSize = 9428 values, err := url.ParseQuery(fmt.Sprintf("maxFileSize=%d", customMaxFileSize)) - assert.NoError(t, err) + require.NoError(t, err) el = loadLimits(values, &defaultConfig) assert.EqualValues(t, el.MaxFileSize, customMaxFileSize) } + +func TestExtractHandlerMissingQueryParam(t *testing.T) { + testServer := httptest.NewServer(errorHandler(extractHandler)) + defer testServer.Close() + res, err := http.Get(testServer.URL + "/extract") + require.NoError(t, err) + assert.Equal(t, http.StatusInternalServerError, res.StatusCode) +} diff --git a/zipserver/game_analyzer.go b/zipserver/game_analyzer.go new file mode 100644 index 0000000..a321c91 --- /dev/null +++ b/zipserver/game_analyzer.go @@ -0,0 +1,94 @@ +package zipserver + +import ( + "bytes" + "io" + "mime" + "net/http" + "path" + "strings" + + "github.com/go-errors/errors" +) + +// GameAnalyzer uses rules applying to HTML5 game uploads. +// gzip-compressed files are marked with the appropriate content type and encoding. +// +// If onlyExtractFiles is non-nil, files whose names not in the list will be skipped. +type GameAnalyzer struct { + onlyExtractFiles []string +} + +func (d GameAnalyzer) shouldExtract(name string) bool { + if d.onlyExtractFiles == nil { + return true + } + for _, v := range d.onlyExtractFiles { + if name == v { + return true + } + } + return false +} + +func (d GameAnalyzer) Analyze(r io.Reader, filename string) (AnalyzeResult, error) { + res := AnalyzeResult{} + + if !d.shouldExtract(filename) { + return res, ErrSkipped + } + + mimeType := mime.TypeByExtension(path.Ext(filename)) + + var buffer bytes.Buffer + _, err := io.Copy(&buffer, io.LimitReader(r, 512)) + if err != nil { + return res, errors.Wrap(err, 0) + } + + contentMimeType := http.DetectContentType(buffer.Bytes()) + extension := path.Ext(filename) + + if contentMimeType == "application/x-gzip" || contentMimeType == "application/gzip" { + res.ContentEncoding = "gzip" + + // try to see if there's a real extension hidden beneath + if extension == ".gz" { + realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(filename, ".gz"))) + if realMimeType != "" { + mimeType = realMimeType + } + } else { + // To support gzip-compressed exports from Unity 5.5 and below, rename file. + // https://docs.unity3d.com/550/Documentation/Manual/webgl-deploying.html + if replacement, ok := unityExtReplacements[extension]; ok { + res.RenameTo = strings.TrimSuffix(filename, extension) + replacement + } + } + } else if extension == ".br" { + // there is no way to detect a brotli stream by content, so we assume if it ends if .br then it's brotli + // this path is used for Unity 2020 webgl games built with brotli compression + res.ContentEncoding = "br" + realMimeType := mime.TypeByExtension(path.Ext(strings.TrimSuffix(filename, ".br"))) + if realMimeType != "" { + mimeType = realMimeType + } + } else if mimeType == "" { + // fall back to the extension detected from content, eg. someone uploaded a .png with wrong extension + mimeType = contentMimeType + } + if mimeType == "" { + mimeType = "application/octet-stream" + } + + res.ContentType = mimeType + + return res, nil +} + +var unityExtReplacements = map[string]string{ + ".jsgz": ".js", + ".datagz": ".data", + ".memgz": ".mem", + ".unity3dgz": ".unity3d", +} diff --git a/zipserver/readers_test.go b/zipserver/readers_test.go index 57eaa99..1ad66f5 100644 --- a/zipserver/readers_test.go +++ b/zipserver/readers_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_annotatedReader(t *testing.T) { @@ -23,7 +24,7 @@ func Test_annotatedReader(t *testing.T) { if err == io.EOF { break } - assert.NoError(t, err) + require.NoError(t, err) } assert.EqualValues(t, len(s), totalBytes) } @@ -36,7 +37,7 @@ func Test_limitedReader(t *testing.T) { lr := limitedReader(sr, 128, &totalBytes) result, err := io.ReadAll(lr) - assert.NoError(t, err) + require.NoError(t, err) assert.EqualValues(t, s, string(result)) assert.EqualValues(t, len(s), totalBytes) diff --git a/zipserver/serve_zip.go b/zipserver/serve_zip.go index 2aa93a5..9fb3fb5 100644 --- a/zipserver/serve_zip.go +++ b/zipserver/serve_zip.go @@ -107,7 +107,8 @@ func ServeZip(config *Config, serve string) error { archiver := &Archiver{storage, config} prefix := "extracted" - _, err = archiver.ExtractZip(ctx, key, prefix, DefaultExtractLimits(config)) + // TODO: Add CLI option to choose game or music content. + _, err = archiver.ExtractZip(ctx, key, prefix, DefaultExtractLimits(config), &GameAnalyzer{}) if err != nil { return errors.Wrap(err, 0) } diff --git a/zipserver/slurp_handler.go b/zipserver/slurp_handler.go index 3839e20..df85178 100644 --- a/zipserver/slurp_handler.go +++ b/zipserver/slurp_handler.go @@ -145,9 +145,19 @@ func slurpHandler(w http.ResponseWriter, r *http.Request) error { } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - _, err = http.DefaultClient.Do(req) + res, err := http.DefaultClient.Do(req) if err != nil { - log.Print("Failed to deliver callback: " + err.Error()) + log.Printf("Failed to deliver callback: %v", err) + return + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusNoContent { + body, err := io.ReadAll(res.Body) + if err != nil { + log.Printf("Read notification response: %v", err) + return + } + log.Printf("Notification response: %s %s", res.Status, string(body)) } })() diff --git a/zipserver/specifications.go b/zipserver/specifications.go deleted file mode 100644 index 4350fab..0000000 --- a/zipserver/specifications.go +++ /dev/null @@ -1,70 +0,0 @@ -package zipserver - -import ( - "fmt" - "net/http" - "path" - "strings" -) - -// ResourceSpec contains all the info for an HTTP resource relevant for -// setting http headers and keeping track of the extraction work -type ResourceSpec struct { - size uint64 - key string - contentType string - contentEncoding string -} - -func (rs *ResourceSpec) String() string { - formattedEncoding := "" - if rs.contentEncoding != "" { - formattedEncoding = fmt.Sprintf(", %s encoding", rs.contentEncoding) - } - - return fmt.Sprintf("%s (%s%s)", rs.key, rs.contentType, formattedEncoding) -} - -// setupRequest sets the proper HTTP headers on a request for storing this resource -func (rs *ResourceSpec) setupRequest(req *http.Request) error { - // All extracted files must be readable without authentication - req.Header.Set("x-goog-acl", "public-read") - - req.Header.Set("content-type", rs.contentType) - if rs.contentEncoding != "" { - req.Header.Set("content-encoding", rs.contentEncoding) - } - return nil -} - -// RewriteSpec contains rules for rewriting file extensions -type RewriteSpec struct { - oldExtension string - newExtension string -} - -var rewriteSpecs = []RewriteSpec{ - // // For Unity WebGL up to 5.5, see - // // https://docs.unity3d.com/550/Documentation/Manual/webgl-deploying.html - {".jsgz", ".js"}, - {".datagz", ".data"}, - {".memgz", ".mem"}, - {".unity3dgz", ".unity3d"}, -} - -func (rs *ResourceSpec) applyRewriteRules() { - // rewrite rules only apply when we've identified the gzip suffix - if rs.contentEncoding != "gzip" { - return - } - - extension := path.Ext(rs.key) - - for _, spec := range rewriteSpecs { - if extension == spec.oldExtension { - rs.key = strings.TrimSuffix(rs.key, spec.oldExtension) + spec.newExtension - // only apply one rule at most - return - } - } -}