From 0bf0ff75670be4aadd1d9fc8f329c5f2a1a1e4f3 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 31 May 2021 12:55:26 +0200 Subject: [PATCH] Add gzip HTTP wrapper Fork and clean up+extend the dead `nytimes/gziphandler` project. Includes https://github.com/nytimes/gziphandler/pull/106 as well as support for stateless encoding. Removes testify from deps. --- gzhttp/LICENSE | 201 +++++++++ gzhttp/README.md | 54 +++ gzhttp/asserts_test.go | 69 +++ gzhttp/gzip.go | 519 ++++++++++++++++++++++ gzhttp/gzip_test.go | 651 ++++++++++++++++++++++++++++ gzhttp/writer/gzkp/gzkp.go | 68 +++ gzhttp/writer/gzkp/gzkp_test.go | 26 ++ gzhttp/writer/interface.go | 20 + gzhttp/writer/stdlib/stdlib.go | 68 +++ gzhttp/writer/stdlib/stdlib_test.go | 27 ++ 10 files changed, 1703 insertions(+) create mode 100644 gzhttp/LICENSE create mode 100644 gzhttp/README.md create mode 100644 gzhttp/asserts_test.go create mode 100644 gzhttp/gzip.go create mode 100644 gzhttp/gzip_test.go create mode 100644 gzhttp/writer/gzkp/gzkp.go create mode 100644 gzhttp/writer/gzkp/gzkp_test.go create mode 100644 gzhttp/writer/interface.go create mode 100644 gzhttp/writer/stdlib/stdlib.go create mode 100644 gzhttp/writer/stdlib/stdlib_test.go diff --git a/gzhttp/LICENSE b/gzhttp/LICENSE new file mode 100644 index 0000000000..df6192d36f --- /dev/null +++ b/gzhttp/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016-2017 The New York Times Company + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/gzhttp/README.md b/gzhttp/README.md new file mode 100644 index 0000000000..2816371274 --- /dev/null +++ b/gzhttp/README.md @@ -0,0 +1,54 @@ +Gzip Handler +============ + +This is a tiny Go package which wraps HTTP handlers to transparently gzip the +response body, for clients which support it. + +This package is forked from the [dead nytimes/gziphandler](https://github.com/nytimes/gziphandler) +and extends functionality for it. + +## Install +```bash +go get -u github.com/klauspost/compress +``` + +## Usage + +Call `GzipHandler` with any handler (an object which implements the +`http.Handler` interface), and it'll return a new handler which gzips the +response. For example: + +```go +package main + +import ( + "io" + "net/http" + "github.com/klauspost/compress/gzhttp" +) + +func main() { + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + io.WriteString(w, "Hello, World") + }) + + http.Handle("/", gzhttp.GzipHandler(handler)) + http.ListenAndServe("0.0.0.0:8000", nil) +} +``` + + +## Documentation + +The docs can be found at [godoc.org][], as usual. + + +## License + +[Apache 2.0][license]. + + + +[docs]: https://godoc.org/github.com/NYTimes/gziphandler +[license]: https://github.com/NYTimes/gziphandler/blob/master/LICENSE diff --git a/gzhttp/asserts_test.go b/gzhttp/asserts_test.go new file mode 100644 index 0000000000..987b848ea0 --- /dev/null +++ b/gzhttp/asserts_test.go @@ -0,0 +1,69 @@ +package gziphandler + +import ( + "reflect" + "testing" +) + +func assertEqual(t testing.TB, want, got interface{}) { + t.Helper() + if !reflect.DeepEqual(want, got) { + t.Fatalf("want %#v, got %#v", want, got) + } +} + +func assertNotEqual(t testing.TB, want, got interface{}) { + t.Helper() + if reflect.DeepEqual(want, got) { + t.Fatalf("want %#v, got %#v", want, got) + } +} + +func assertNil(t testing.TB, object interface{}) { + if isNil(object) { + return + } + t.Helper() + t.Fatalf("Expected value to be nil.") +} + +func assertNotNil(t testing.TB, object interface{}) { + if !isNil(object) { + return + } + t.Helper() + t.Fatalf("Expected value not to be nil.") +} + +// isNil checks if a specified object is nil or not, without Failing. +func isNil(object interface{}) bool { + if object == nil { + return true + } + + value := reflect.ValueOf(object) + kind := value.Kind() + isNilableKind := containsKind( + []reflect.Kind{ + reflect.Chan, reflect.Func, + reflect.Interface, reflect.Map, + reflect.Ptr, reflect.Slice}, + kind) + + if isNilableKind && value.IsNil() { + return true + } + + return false +} + +// containsKind checks if a specified kind in the slice of kinds. +func containsKind(kinds []reflect.Kind, kind reflect.Kind) bool { + for i := 0; i < len(kinds); i++ { + if kind == kinds[i] { + return true + } + } + + return false +} diff --git a/gzhttp/gzip.go b/gzhttp/gzip.go new file mode 100644 index 0000000000..dcca6288c8 --- /dev/null +++ b/gzhttp/gzip.go @@ -0,0 +1,519 @@ +package gziphandler + +import ( + "bufio" + "fmt" + "io" + "mime" + "net" + "net/http" + "strconv" + "strings" + + "github.com/klauspost/compress/gzhttp/writer" + "github.com/klauspost/compress/gzhttp/writer/gzkp" + "github.com/klauspost/compress/gzip" +) + +const ( + vary = "Vary" + acceptEncoding = "Accept-Encoding" + contentEncoding = "Content-Encoding" + contentType = "Content-Type" + contentLength = "Content-Length" +) + +type codings map[string]float64 + +const ( + // DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set. + // This is actually kind of ambiguous in RFC 2616, so hopefully it's correct. + // The examples seem to indicate that it is. + DefaultQValue = 1.0 + + // DefaultMinSize is the default minimum size until we enable gzip compression. + // 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer. + // If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing. + // That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value. + DefaultMinSize = 1400 +) + +// GzipResponseWriter provides an http.ResponseWriter interface, which gzips +// bytes before writing them to the underlying response. This doesn't close the +// writers, so don't forget to do that. +// It can be configured to skip response smaller than minSize. +type GzipResponseWriter struct { + http.ResponseWriter + level int + gwFactory writer.GzipWriterFactory + gw writer.GzipWriter + + code int // Saves the WriteHeader value. + + minSize int // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed. + buf []byte // Holds the first part of the write before reaching the minSize or the end of the write. + ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter. + + contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty. +} + +type GzipResponseWriterWithCloseNotify struct { + *GzipResponseWriter +} + +func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool { + return w.ResponseWriter.(http.CloseNotifier).CloseNotify() +} + +// Write appends data to the gzip writer. +func (w *GzipResponseWriter) Write(b []byte) (int, error) { + // GZIP responseWriter is initialized. Use the GZIP responseWriter. + if w.gw != nil { + return w.gw.Write(b) + } + + // If we have already decided not to use GZIP, immediately passthrough. + if w.ignore { + return w.ResponseWriter.Write(b) + } + + // Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter. + // On the first write, w.buf changes from nil to a valid slice + w.buf = append(w.buf, b...) + + var ( + cl, _ = strconv.Atoi(w.Header().Get(contentLength)) + ct = w.Header().Get(contentType) + ce = w.Header().Get(contentEncoding) + ) + // Only continue if they didn't already choose an encoding or a known unhandled content length or type. + if ce == "" && (cl == 0 || cl >= w.minSize) && (ct == "" || handleContentType(w.contentTypes, ct)) { + // If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data. + if len(w.buf) < w.minSize && cl == 0 { + return len(b), nil + } + // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. + if cl >= w.minSize || len(w.buf) >= w.minSize { + // If a Content-Type wasn't specified, infer it from the current buffer. + if ct == "" { + ct = http.DetectContentType(w.buf) + w.Header().Set(contentType, ct) + } + // If the Content-Type is acceptable to GZIP, initialize the GZIP writer. + if handleContentType(w.contentTypes, ct) { + if err := w.startGzip(); err != nil { + return 0, err + } + return len(b), nil + } + } + } + // If we got here, we should not GZIP this response. + if err := w.startPlain(); err != nil { + return 0, err + } + return len(b), nil +} + +// startGzip initializes a GZIP writer and writes the buffer. +func (w *GzipResponseWriter) startGzip() error { + // Set the GZIP header. + w.Header().Set(contentEncoding, "gzip") + + // if the Content-Length is already set, then calls to Write on gzip + // will fail to set the Content-Length header since its already set + // See: https://github.com/golang/go/issues/14975. + w.Header().Del(contentLength) + + // Write the header to gzip response. + if w.code != 0 { + w.ResponseWriter.WriteHeader(w.code) + // Ensure that no other WriteHeader's happen + w.code = 0 + } + + // Initialize and flush the buffer into the gzip response if there are any bytes. + // If there aren't any, we shouldn't initialize it yet because on Close it will + // write the gzip header even if nothing was ever written. + if len(w.buf) > 0 { + // Initialize the GZIP response. + w.init() + n, err := w.gw.Write(w.buf) + + // This should never happen (per io.Writer docs), but if the write didn't + // accept the entire buffer but returned no specific error, we have no clue + // what's going on, so abort just to be safe. + if err == nil && n < len(w.buf) { + err = io.ErrShortWrite + } + return err + } + return nil +} + +// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip. +func (w *GzipResponseWriter) startPlain() error { + if w.code != 0 { + w.ResponseWriter.WriteHeader(w.code) + // Ensure that no other WriteHeader's happen + w.code = 0 + } + w.ignore = true + // If Write was never called then don't call Write on the underlying ResponseWriter. + if w.buf == nil { + return nil + } + n, err := w.ResponseWriter.Write(w.buf) + w.buf = nil + // This should never happen (per io.Writer docs), but if the write didn't + // accept the entire buffer but returned no specific error, we have no clue + // what's going on, so abort just to be safe. + if err == nil && n < len(w.buf) { + err = io.ErrShortWrite + } + return err +} + +// WriteHeader just saves the response code until close or GZIP effective writes. +func (w *GzipResponseWriter) WriteHeader(code int) { + if w.code == 0 { + w.code = code + } +} + +// init graps a new gzip writer from the gzipWriterPool and writes the correct +// content encoding header. +func (w *GzipResponseWriter) init() { + // Bytes written during ServeHTTP are redirected to this gzip writer + // before being written to the underlying response. + w.gw = w.gwFactory.New(w.ResponseWriter, w.level) +} + +// Close will close the gzip.Writer and will put it back in the gzipWriterPool. +func (w *GzipResponseWriter) Close() error { + if w.ignore { + return nil + } + + if w.gw == nil { + // GZIP not triggered yet, write out regular response. + err := w.startPlain() + // Returns the error if any at write. + if err != nil { + err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error()) + } + return err + } + + err := w.gw.Close() + w.gw = nil + return err +} + +// Flush flushes the underlying *gzip.Writer and then the underlying +// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter +// an http.Flusher. +func (w *GzipResponseWriter) Flush() { + if w.gw == nil && !w.ignore { + // Only flush once startGzip or startPlain has been called. + // + // Flush is thus a no-op until we're certain whether a plain + // or gzipped response will be served. + return + } + + if w.gw != nil { + w.gw.Flush() + } + + if fw, ok := w.ResponseWriter.(http.Flusher); ok { + fw.Flush() + } +} + +// Hijack implements http.Hijacker. If the underlying ResponseWriter is a +// Hijacker, its Hijack method is returned. Otherwise an error is returned. +func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { + if hj, ok := w.ResponseWriter.(http.Hijacker); ok { + return hj.Hijack() + } + return nil, nil, fmt.Errorf("http.Hijacker interface is not supported") +} + +// verify Hijacker interface implementation +var _ http.Hijacker = &GzipResponseWriter{} + +// MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in +// an error case it panics rather than returning an error. +func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler { + wrap, err := NewGzipHandler(CompressionLevel(level)) + if err != nil { + panic(err) + } + return wrap +} + +// NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller +// specify the minimum size before compression. +func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) { + return NewGzipHandler(CompressionLevel(level), MinSize(minSize)) +} + +func NewGzipHandler(opts ...option) (func(http.Handler) http.Handler, error) { + c := &config{ + level: gzip.DefaultCompression, + minSize: DefaultMinSize, + writer: writer.GzipWriterFactory{ + Levels: gzkp.Levels, + New: gzkp.NewWriter, + }, + } + + for _, o := range opts { + o(c) + } + + if err := c.validate(); err != nil { + return nil, err + } + + return func(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Add(vary, acceptEncoding) + if acceptsGzip(r) { + gw := &GzipResponseWriter{ + ResponseWriter: w, + gwFactory: c.writer, + level: c.level, + minSize: c.minSize, + contentTypes: c.contentTypes, + } + defer gw.Close() + + if _, ok := w.(http.CloseNotifier); ok { + gwcn := GzipResponseWriterWithCloseNotify{gw} + h.ServeHTTP(gwcn, r) + } else { + h.ServeHTTP(gw, r) + } + + } else { + h.ServeHTTP(w, r) + } + }) + }, nil +} + +// Parsed representation of one of the inputs to ContentTypes. +// See https://golang.org/pkg/mime/#ParseMediaType +type parsedContentType struct { + mediaType string + params map[string]string +} + +// equals returns whether this content type matches another content type. +func (pct parsedContentType) equals(mediaType string, params map[string]string) bool { + if pct.mediaType != mediaType { + return false + } + // if pct has no params, don't care about other's params + if len(pct.params) == 0 { + return true + } + + // if pct has any params, they must be identical to other's. + if len(pct.params) != len(params) { + return false + } + for k, v := range pct.params { + if w, ok := params[k]; !ok || v != w { + return false + } + } + return true +} + +// Used for functional configuration. +type config struct { + minSize int + level int + writer writer.GzipWriterFactory + contentTypes []parsedContentType +} + +func (c *config) validate() error { + min, max := c.writer.Levels() + if c.level < min || c.level > max { + return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max) + } + + if c.minSize < 0 { + return fmt.Errorf("minimum size must be more than zero") + } + + return nil +} + +type option func(c *config) + +func MinSize(size int) option { + return func(c *config) { + c.minSize = size + } +} + +// CompressionLevel sets the compression level +func CompressionLevel(level int) option { + return func(c *config) { + c.level = level + } +} + +// Implementation changes the implementation of GzipWriter +// +// The default implementation is writer/stdlib/NewWriter +// which is backed by standard library's compress/zlib +func Implementation(writer writer.GzipWriterFactory) option { + return func(c *config) { + c.writer = writer + } +} + +// ContentTypes specifies a list of content types to compare +// the Content-Type header to before compressing. If none +// match, the response will be returned as-is. +// +// Content types are compared in a case-insensitive, whitespace-ignored +// manner. +// +// A MIME type without any other directive will match a content type +// that has the same MIME type, regardless of that content type's other +// directives. I.e., "text/html" will match both "text/html" and +// "text/html; charset=utf-8". +// +// A MIME type with any other directive will only match a content type +// that has the same MIME type and other directives. I.e., +// "text/html; charset=utf-8" will only match "text/html; charset=utf-8". +// +// By default, responses are gzipped regardless of +// Content-Type. +func ContentTypes(types []string) option { + return func(c *config) { + c.contentTypes = []parsedContentType{} + for _, v := range types { + mediaType, params, err := mime.ParseMediaType(v) + if err == nil { + c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params}) + } + } + } +} + +/* +func ContentTypeFilter(func(contentType string) bool) { + return func(c *config) { + c.contentTypes = []parsedContentType{} + for _, v := range types { + mediaType, params, err := mime.ParseMediaType(v) + if err == nil { + c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params}) + } + } + } +} +*/ + +// GzipHandler wraps an HTTP handler, to transparently gzip the response body if +// the client supports it (via the Accept-Encoding header). This will compress at +// the default compression level. +func GzipHandler(h http.Handler, opts ...option) http.Handler { + wrapper, _ := NewGzipHandler(opts...) + return wrapper(h) +} + +// acceptsGzip returns true if the given HTTP request indicates that it will +// accept a gzipped response. +func acceptsGzip(r *http.Request) bool { + acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding)) + return acceptedEncodings["gzip"] > 0.0 +} + +// returns true if we've been configured to compress the specific content type. +func handleContentType(contentTypes []parsedContentType, ct string) bool { + // If contentTypes is empty we handle all content types. + if len(contentTypes) == 0 { + return true + } + + mediaType, params, err := mime.ParseMediaType(ct) + if err != nil { + return false + } + + for _, c := range contentTypes { + if c.equals(mediaType, params) { + return true + } + } + + return false +} + +// parseEncodings attempts to parse a list of codings, per RFC 2616, as might +// appear in an Accept-Encoding header. It returns a map of content-codings to +// quality values, and an error containing the errors encountered. It's probably +// safe to ignore those, because silently ignoring errors is how the internet +// works. +// +// See: http://tools.ietf.org/html/rfc2616#section-14.3. +func parseEncodings(s string) (codings, error) { + split := strings.Split(s, ",") + c := make(codings, len(split)) + var e []string + + for _, ss := range split { + coding, qvalue, err := parseCoding(ss) + + if err != nil { + e = append(e, err.Error()) + } else { + c[coding] = qvalue + } + } + + // TODO (adammck): Use a proper multi-error struct, so the individual errors + // can be extracted if anyone cares. + if len(e) > 0 { + return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", ")) + } + + return c, nil +} + +// parseCoding parses a single conding (content-coding with an optional qvalue), +// as might appear in an Accept-Encoding header. It attempts to forgive minor +// formatting errors. +func parseCoding(s string) (coding string, qvalue float64, err error) { + for n, part := range strings.Split(s, ";") { + part = strings.TrimSpace(part) + qvalue = DefaultQValue + + if n == 0 { + coding = strings.ToLower(part) + } else if strings.HasPrefix(part, "q=") { + qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64) + + if qvalue < 0.0 { + qvalue = 0.0 + } else if qvalue > 1.0 { + qvalue = 1.0 + } + } + } + + if coding == "" { + err = fmt.Errorf("empty content-coding") + } + + return +} diff --git a/gzhttp/gzip_test.go b/gzhttp/gzip_test.go new file mode 100644 index 0000000000..ca5ab9f909 --- /dev/null +++ b/gzhttp/gzip_test.go @@ -0,0 +1,651 @@ +package gziphandler + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/http/httptest" + "net/url" + "strconv" + "testing" + + "github.com/klauspost/compress/gzip" +) + +const ( + smallTestBody = "aaabbcaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbc" + testBody = "aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc aaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbcccaaabbbccc" +) + +func TestParseEncodings(t *testing.T) { + examples := map[string]codings{ + + // Examples from RFC 2616 + "compress, gzip": {"compress": 1.0, "gzip": 1.0}, + "": {}, + "*": {"*": 1.0}, + "compress;q=0.5, gzip;q=1.0": {"compress": 0.5, "gzip": 1.0}, + "gzip;q=1.0, identity; q=0.5, *;q=0": {"gzip": 1.0, "identity": 0.5, "*": 0.0}, + + // More random stuff + "AAA;q=1": {"aaa": 1.0}, + "BBB ; q = 2": {"bbb": 1.0}, + } + + for eg, exp := range examples { + act, _ := parseEncodings(eg) + assertEqual(t, exp, act) + } +} + +func TestGzipHandler(t *testing.T) { + // This just exists to provide something for GzipHandler to wrap. + handler := newTestHandler(testBody) + + // requests without accept-encoding are passed along as-is + + req1, _ := http.NewRequest("GET", "/whatever", nil) + resp1 := httptest.NewRecorder() + handler.ServeHTTP(resp1, req1) + res1 := resp1.Result() + + assertEqual(t, 200, res1.StatusCode) + assertEqual(t, "", res1.Header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", res1.Header.Get("Vary")) + assertEqual(t, testBody, resp1.Body.String()) + + // but requests with accept-encoding:gzip are compressed if possible + + req2, _ := http.NewRequest("GET", "/whatever", nil) + req2.Header.Set("Accept-Encoding", "gzip") + resp2 := httptest.NewRecorder() + handler.ServeHTTP(resp2, req2) + res2 := resp2.Result() + + assertEqual(t, 200, res2.StatusCode) + assertEqual(t, "gzip", res2.Header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", res2.Header.Get("Vary")) + assertEqual(t, gzipStrLevel(testBody, gzip.DefaultCompression), resp2.Body.Bytes()) + + // content-type header is correctly set based on uncompressed body + + req3, _ := http.NewRequest("GET", "/whatever", nil) + req3.Header.Set("Accept-Encoding", "gzip") + res3 := httptest.NewRecorder() + handler.ServeHTTP(res3, req3) + + assertEqual(t, http.DetectContentType([]byte(testBody)), res3.Header().Get("Content-Type")) +} + +func TestGzipHandlerSmallBodyNoCompression(t *testing.T) { + handler := newTestHandler(smallTestBody) + + req, _ := http.NewRequest("GET", "/whatever", nil) + req.Header.Set("Accept-Encoding", "gzip") + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + res := resp.Result() + + // with less than 1400 bytes the response should not be gzipped + + assertEqual(t, 200, res.StatusCode) + assertEqual(t, "", res.Header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", res.Header.Get("Vary")) + assertEqual(t, smallTestBody, resp.Body.String()) + +} + +func TestGzipHandlerAlreadyCompressed(t *testing.T) { + handler := newTestHandler(testBody) + + req, _ := http.NewRequest("GET", "/gzipped", nil) + req.Header.Set("Accept-Encoding", "gzip") + res := httptest.NewRecorder() + handler.ServeHTTP(res, req) + + assertEqual(t, testBody, res.Body.String()) +} + +func TestNewGzipLevelHandler(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + io.WriteString(w, testBody) + }) + + for lvl := gzip.StatelessCompression; lvl <= gzip.BestCompression; lvl++ { + t.Run(fmt.Sprint(lvl), func(t *testing.T) { + wrapper, err := NewGzipHandler(CompressionLevel(lvl)) + assertNil(t, err) + + req, _ := http.NewRequest("GET", "/whatever", nil) + req.Header.Set("Accept-Encoding", "gzip") + resp := httptest.NewRecorder() + wrapper(handler).ServeHTTP(resp, req) + res := resp.Result() + + assertEqual(t, 200, res.StatusCode) + assertEqual(t, "gzip", res.Header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", res.Header.Get("Vary")) + got := gzipStrLevel(testBody, lvl) + assertEqual(t, got, resp.Body.Bytes()) + t.Log(lvl, len(got)) + }) + } +} + +func TestNewGzipLevelHandlerReturnsErrorForInvalidLevels(t *testing.T) { + var err error + _, err = NewGzipHandler(CompressionLevel(-42)) + assertNotNil(t, err) + + _, err = NewGzipHandler(CompressionLevel(42)) + assertNotNil(t, err) +} + +func TestMustNewGzipLevelHandlerWillPanic(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("panic was not called") + } + }() + + _ = MustNewGzipLevelHandler(-42) +} + +func TestGzipHandlerNoBody(t *testing.T) { + tests := []struct { + statusCode int + contentEncoding string + emptyBody bool + body []byte + }{ + // Body must be empty. + {http.StatusNoContent, "", true, nil}, + {http.StatusNotModified, "", true, nil}, + // Body is going to get gzip'd no matter what. + {http.StatusOK, "", true, []byte{}}, + {http.StatusOK, "gzip", false, []byte(testBody)}, + } + + for num, test := range tests { + t.Run(fmt.Sprintf("test-%d", num), func(t *testing.T) { + handler := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(test.statusCode) + if test.body != nil { + w.Write(test.body) + } + })) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.Header.Set("Accept-Encoding", "gzip") + handler.ServeHTTP(rec, req) + + body, err := ioutil.ReadAll(rec.Body) + if err != nil { + t.Fatalf("Unexpected error reading response body: %v", err) + } + + header := rec.Header() + assertEqual(t, test.contentEncoding, header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", header.Get("Vary")) + if test.emptyBody { + assertEqual(t, 0, len(body)) + } else { + assertNotEqual(t, 0, len(body)) + assertNotEqual(t, test.body, body) + } + }) + + } +} + +func TestGzipHandlerContentLength(t *testing.T) { + testBodyBytes := []byte(testBody) + tests := []struct { + bodyLen int + bodies [][]byte + emptyBody bool + }{ + {len(testBody), [][]byte{testBodyBytes}, false}, + // each of these writes is less than the DefaultMinSize + {len(testBody), [][]byte{testBodyBytes[:200], testBodyBytes[200:]}, false}, + // without a defined Content-Length it should still gzip + {0, [][]byte{testBodyBytes[:200], testBodyBytes[200:]}, false}, + // simulate a HEAD request with an empty write (to populate headers) + {len(testBody), [][]byte{nil}, true}, + } + + // httptest.NewRecorder doesn't give you access to the Content-Length + // header so instead, we create a server on a random port and make + // a request to that instead + ln, err := net.Listen("tcp", "127.0.0.1:") + if err != nil { + t.Fatalf("failed creating listen socket: %v", err) + } + defer ln.Close() + srv := &http.Server{ + Handler: nil, + } + go srv.Serve(ln) + + for num, test := range tests { + t.Run(fmt.Sprintf("test-%d", num), func(t *testing.T) { + srv.Handler = GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if test.bodyLen > 0 { + w.Header().Set("Content-Length", strconv.Itoa(test.bodyLen)) + } + for _, b := range test.bodies { + w.Write(b) + } + })) + req := &http.Request{ + Method: "GET", + URL: &url.URL{Path: "/", Scheme: "http", Host: ln.Addr().String()}, + Header: make(http.Header), + Close: true, + } + req.Header.Set("Accept-Encoding", "gzip") + res, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("Unexpected error making http request in test iteration %d: %v", num, err) + } + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + t.Fatalf("Unexpected error reading response body in test iteration %d: %v", num, err) + } + + l, err := strconv.Atoi(res.Header.Get("Content-Length")) + if err != nil { + t.Fatalf("Unexpected error parsing Content-Length in test iteration %d: %v", num, err) + } + if test.emptyBody { + assertEqual(t, 0, len(body)) + assertEqual(t, 0, l) + } else { + assertEqual(t, len(body), l) + } + assertEqual(t, "gzip", res.Header.Get("Content-Encoding")) + assertNotEqual(t, test.bodyLen, l) + }) + } +} + +func TestGzipHandlerMinSizeMustBePositive(t *testing.T) { + _, err := NewGzipLevelAndMinSize(gzip.DefaultCompression, -1) + assertNotNil(t, err) +} + +func TestGzipHandlerMinSize(t *testing.T) { + responseLength := 0 + b := []byte{'x'} + + wrapper, _ := NewGzipLevelAndMinSize(gzip.DefaultCompression, 128) + handler := wrapper(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + // Write responses one byte at a time to ensure that the flush + // mechanism, if used, is working properly. + for i := 0; i < responseLength; i++ { + n, err := w.Write(b) + assertEqual(t, 1, n) + assertNil(t, err) + } + }, + )) + + r, _ := http.NewRequest("GET", "/whatever", &bytes.Buffer{}) + r.Header.Add("Accept-Encoding", "gzip") + + // Short response is not compressed + responseLength = 127 + w := httptest.NewRecorder() + handler.ServeHTTP(w, r) + if w.Result().Header.Get(contentEncoding) == "gzip" { + t.Error("Expected uncompressed response, got compressed") + } + + // Long response is not compressed + responseLength = 128 + w = httptest.NewRecorder() + handler.ServeHTTP(w, r) + if w.Result().Header.Get(contentEncoding) != "gzip" { + t.Error("Expected compressed response, got uncompressed") + } +} + +type panicOnSecondWriteHeaderWriter struct { + http.ResponseWriter + headerWritten bool +} + +func (w *panicOnSecondWriteHeaderWriter) WriteHeader(s int) { + if w.headerWritten { + panic("header already written") + } + w.headerWritten = true + w.ResponseWriter.WriteHeader(s) +} + +func TestGzipHandlerDoubleWriteHeader(t *testing.T) { + handler := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Length", "15000") + // Specifically write the header here + w.WriteHeader(304) + // Ensure that after a Write the header isn't triggered again on close + w.Write(nil) + })) + wrapper := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w = &panicOnSecondWriteHeaderWriter{ + ResponseWriter: w, + } + handler.ServeHTTP(w, r) + }) + + rec := httptest.NewRecorder() + // TODO: in Go1.7 httptest.NewRequest was introduced this should be used + // once 1.6 is not longer supported. + req := &http.Request{ + Method: "GET", + URL: &url.URL{Path: "/"}, + Proto: "HTTP/1.1", + ProtoMinor: 1, + RemoteAddr: "192.0.2.1:1234", + Header: make(http.Header), + } + req.Header.Set("Accept-Encoding", "gzip") + wrapper.ServeHTTP(rec, req) + body, err := ioutil.ReadAll(rec.Body) + if err != nil { + t.Fatalf("Unexpected error reading response body: %v", err) + } + assertEqual(t, 0, len(body)) + header := rec.Header() + assertEqual(t, "gzip", header.Get("Content-Encoding")) + assertEqual(t, "Accept-Encoding", header.Get("Vary")) + assertEqual(t, 304, rec.Code) +} + +func TestStatusCodes(t *testing.T) { + handler := GzipHandler(http.NotFoundHandler()) + r := httptest.NewRequest("GET", "/", nil) + r.Header.Set("Accept-Encoding", "gzip") + w := httptest.NewRecorder() + handler.ServeHTTP(w, r) + + result := w.Result() + if result.StatusCode != 404 { + t.Errorf("StatusCode should have been 404 but was %d", result.StatusCode) + } +} + +func TestFlushBeforeWrite(t *testing.T) { + b := []byte(testBody) + handler := GzipHandler(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.WriteHeader(http.StatusNotFound) + rw.(http.Flusher).Flush() + rw.Write(b) + })) + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.Header.Set("Accept-Encoding", "gzip") + w := httptest.NewRecorder() + handler.ServeHTTP(w, r) + + res := w.Result() + assertEqual(t, http.StatusNotFound, res.StatusCode) + assertEqual(t, "gzip", res.Header.Get("Content-Encoding")) + assertNotEqual(t, b, w.Body.Bytes()) +} + +func TestImplementCloseNotifier(t *testing.T) { + request := httptest.NewRequest(http.MethodGet, "/", nil) + request.Header.Set(acceptEncoding, "gzip") + GzipHandler(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + _, ok := rw.(http.CloseNotifier) + // response writer must implement http.CloseNotifier + assertEqual(t, true, ok) + })).ServeHTTP(&mockRWCloseNotify{}, request) +} + +func TestImplementFlusherAndCloseNotifier(t *testing.T) { + request := httptest.NewRequest(http.MethodGet, "/", nil) + request.Header.Set(acceptEncoding, "gzip") + GzipHandler(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + _, okCloseNotifier := rw.(http.CloseNotifier) + // response writer must implement http.CloseNotifier + assertEqual(t, true, okCloseNotifier) + _, okFlusher := rw.(http.Flusher) + // "response writer must implement http.Flusher" + assertEqual(t, true, okFlusher) + })).ServeHTTP(&mockRWCloseNotify{}, request) +} + +func TestNotImplementCloseNotifier(t *testing.T) { + request := httptest.NewRequest(http.MethodGet, "/", nil) + request.Header.Set(acceptEncoding, "gzip") + GzipHandler(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + _, ok := rw.(http.CloseNotifier) + // response writer must not implement http.CloseNotifier + assertEqual(t, false, ok) + })).ServeHTTP(httptest.NewRecorder(), request) +} + +type mockRWCloseNotify struct{} + +func (m *mockRWCloseNotify) CloseNotify() <-chan bool { + panic("implement me") +} + +func (m *mockRWCloseNotify) Header() http.Header { + return http.Header{} +} + +func (m *mockRWCloseNotify) Write([]byte) (int, error) { + panic("implement me") +} + +func (m *mockRWCloseNotify) WriteHeader(int) { + panic("implement me") +} + +func TestIgnoreSubsequentWriteHeader(t *testing.T) { + handler := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(500) + w.WriteHeader(404) + })) + r := httptest.NewRequest("GET", "/", nil) + r.Header.Set("Accept-Encoding", "gzip") + w := httptest.NewRecorder() + handler.ServeHTTP(w, r) + + result := w.Result() + if result.StatusCode != 500 { + t.Errorf("StatusCode should have been 500 but was %d", result.StatusCode) + } +} + +func TestDontWriteWhenNotWrittenTo(t *testing.T) { + // When using gzip as middleware without ANY writes in the handler, + // ensure the gzip middleware doesn't touch the actual ResponseWriter + // either. + + handler0 := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + })) + + handler1 := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handler0.ServeHTTP(w, r) + w.WriteHeader(404) // this only works if gzip didn't do a WriteHeader(200) + }) + + r := httptest.NewRequest("GET", "/", nil) + r.Header.Set("Accept-Encoding", "gzip") + w := httptest.NewRecorder() + handler1.ServeHTTP(w, r) + + result := w.Result() + if result.StatusCode != 404 { + t.Errorf("StatusCode should have been 404 but was %d", result.StatusCode) + } +} + +var contentTypeTests = []struct { + name string + contentType string + acceptedContentTypes []string + expectedGzip bool +}{ + { + name: "Always gzip when content types are empty", + contentType: "", + acceptedContentTypes: []string{}, + expectedGzip: true, + }, + { + name: "MIME match", + contentType: "application/json", + acceptedContentTypes: []string{"application/json"}, + expectedGzip: true, + }, + { + name: "MIME no match", + contentType: "text/xml", + acceptedContentTypes: []string{"application/json"}, + expectedGzip: false, + }, + { + name: "MIME match with no other directive ignores non-MIME directives", + contentType: "application/json; charset=utf-8", + acceptedContentTypes: []string{"application/json"}, + expectedGzip: true, + }, + { + name: "MIME match with other directives requires all directives be equal, different charset", + contentType: "application/json; charset=ascii", + acceptedContentTypes: []string{"application/json; charset=utf-8"}, + expectedGzip: false, + }, + { + name: "MIME match with other directives requires all directives be equal, same charset", + contentType: "application/json; charset=utf-8", + acceptedContentTypes: []string{"application/json; charset=utf-8"}, + expectedGzip: true, + }, + { + name: "MIME match with other directives requires all directives be equal, missing charset", + contentType: "application/json", + acceptedContentTypes: []string{"application/json; charset=ascii"}, + expectedGzip: false, + }, + { + name: "MIME match case insensitive", + contentType: "Application/Json", + acceptedContentTypes: []string{"application/json"}, + expectedGzip: true, + }, + { + name: "MIME match ignore whitespace", + contentType: "application/json;charset=utf-8", + acceptedContentTypes: []string{"application/json; charset=utf-8"}, + expectedGzip: true, + }, +} + +func TestContentTypes(t *testing.T) { + for _, tt := range contentTypeTests { + t.Run(tt.name, func(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", tt.contentType) + io.WriteString(w, testBody) + }) + + wrapper, err := NewGzipHandler(ContentTypes(tt.acceptedContentTypes)) + assertNil(t, err) + + req, _ := http.NewRequest("GET", "/whatever", nil) + req.Header.Set("Accept-Encoding", "gzip") + resp := httptest.NewRecorder() + wrapper(handler).ServeHTTP(resp, req) + res := resp.Result() + + assertEqual(t, 200, res.StatusCode) + if tt.expectedGzip { + assertEqual(t, "gzip", res.Header.Get("Content-Encoding")) + } else { + assertNotEqual(t, "gzip", res.Header.Get("Content-Encoding")) + } + }) + } +} + +// -------------------------------------------------------------------- + +func BenchmarkGzipHandler_S2k(b *testing.B) { benchmark(b, false, 2048) } +func BenchmarkGzipHandler_S20k(b *testing.B) { benchmark(b, false, 20480) } +func BenchmarkGzipHandler_S100k(b *testing.B) { benchmark(b, false, 102400) } +func BenchmarkGzipHandler_P2k(b *testing.B) { benchmark(b, true, 2048) } +func BenchmarkGzipHandler_P20k(b *testing.B) { benchmark(b, true, 20480) } +func BenchmarkGzipHandler_P100k(b *testing.B) { benchmark(b, true, 102400) } + +// -------------------------------------------------------------------- + +func gzipStrLevel(s string, lvl int) []byte { + var b bytes.Buffer + w, _ := gzip.NewWriterLevel(&b, lvl) + io.WriteString(w, s) + w.Close() + return b.Bytes() +} + +func benchmark(b *testing.B, parallel bool, size int) { + bin, err := ioutil.ReadFile("testdata/benchmark.json") + if err != nil { + b.Fatal(err) + } + + req, _ := http.NewRequest("GET", "/whatever", nil) + req.Header.Set("Accept-Encoding", "gzip") + handler := newTestHandler(string(bin[:size])) + + b.ReportAllocs() + b.SetBytes(int64(size)) + if parallel { + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + runBenchmark(b, req, handler) + } + }) + } else { + b.ResetTimer() + for i := 0; i < b.N; i++ { + runBenchmark(b, req, handler) + } + } +} + +func runBenchmark(b *testing.B, req *http.Request, handler http.Handler) { + res := httptest.NewRecorder() + handler.ServeHTTP(res, req) + if code := res.Code; code != 200 { + b.Fatalf("Expected 200 but got %d", code) + } else if blen := res.Body.Len(); blen < 500 { + b.Fatalf("Expected complete response body, but got %d bytes", blen) + } +} + +func newTestHandler(body string) http.Handler { + return GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/gzipped": + w.Header().Set("Content-Encoding", "gzip") + io.WriteString(w, body) + default: + io.WriteString(w, body) + } + })) +} diff --git a/gzhttp/writer/gzkp/gzkp.go b/gzhttp/writer/gzkp/gzkp.go new file mode 100644 index 0000000000..ed01074253 --- /dev/null +++ b/gzhttp/writer/gzkp/gzkp.go @@ -0,0 +1,68 @@ +package gzkp + +import ( + "io" + "sync" + + "github.com/klauspost/compress/gzhttp/writer" + "github.com/klauspost/compress/gzip" +) + +// gzipWriterPools stores a sync.Pool for each compression level for reuse of +// gzip.Writers. Use poolIndex to covert a compression level to an index into +// gzipWriterPools. +var gzipWriterPools [gzip.BestCompression - gzip.StatelessCompression + 1]*sync.Pool + +func init() { + for i := gzip.StatelessCompression; i <= gzip.BestCompression; i++ { + addLevelPool(i) + } +} + +// poolIndex maps a compression level to its index into gzipWriterPools. It +// assumes that level is a valid gzip compression level. +func poolIndex(level int) int { + return level - gzip.StatelessCompression +} + +func addLevelPool(level int) { + gzipWriterPools[poolIndex(level)] = &sync.Pool{ + New: func() interface{} { + // NewWriterLevel only returns error on a bad level, we are guaranteeing + // that this will be a valid level so it is okay to ignore the returned + // error. + w, _ := gzip.NewWriterLevel(nil, level) + return w + }, + } +} + +type pooledWriter struct { + *gzip.Writer + index int +} + +func (pw *pooledWriter) Close() error { + err := pw.Writer.Close() + gzipWriterPools[pw.index].Put(pw.Writer) + pw.Writer = nil + return err +} + +func NewWriter(w io.Writer, level int) writer.GzipWriter { + index := poolIndex(level) + gzw := gzipWriterPools[index].Get().(*gzip.Writer) + gzw.Reset(w) + return &pooledWriter{ + Writer: gzw, + index: index, + } +} + +func Levels() (min, max int) { + return gzip.StatelessCompression, gzip.BestCompression +} + +func ImplementationInfo() string { + return "klauspost/compress/gzip" +} diff --git a/gzhttp/writer/gzkp/gzkp_test.go b/gzhttp/writer/gzkp/gzkp_test.go new file mode 100644 index 0000000000..467255ee84 --- /dev/null +++ b/gzhttp/writer/gzkp/gzkp_test.go @@ -0,0 +1,26 @@ +package gzkp + +import ( + "bytes" + "compress/gzip" + "testing" +) + +func TestGzipDoubleClose(t *testing.T) { + // reset the pool for the default compression so we can make sure duplicates + // aren't added back by double close + addLevelPool(gzip.DefaultCompression) + + w := bytes.NewBufferString("") + writer := NewWriter(w, gzip.DefaultCompression) + writer.Close() + + // the second close shouldn't have added the same writer + // so we pull out 2 writers from the pool and make sure they're different + w1 := gzipWriterPools[poolIndex(gzip.DefaultCompression)].Get() + w2 := gzipWriterPools[poolIndex(gzip.DefaultCompression)].Get() + + if w1 == w2 { + t.Fatal("got same writer") + } +} diff --git a/gzhttp/writer/interface.go b/gzhttp/writer/interface.go new file mode 100644 index 0000000000..04c739d4ef --- /dev/null +++ b/gzhttp/writer/interface.go @@ -0,0 +1,20 @@ +package writer + +import "io" + +// GzipWriter implements the functions needed for compressing content. +type GzipWriter interface { + Close() error + Flush() error + Write(p []byte) (int, error) +} + +// GzipWriterFactory contains the information needed for the +type GzipWriterFactory struct { + // Must return the minimum and maximum supported level. + Levels func() (min, max int) + + // New must return a new GzipWriter. + // level will always be within the return limits above. + New func(writer io.Writer, level int) GzipWriter +} diff --git a/gzhttp/writer/stdlib/stdlib.go b/gzhttp/writer/stdlib/stdlib.go new file mode 100644 index 0000000000..f646eba297 --- /dev/null +++ b/gzhttp/writer/stdlib/stdlib.go @@ -0,0 +1,68 @@ +package gzkp + +import ( + "compress/gzip" + "io" + "sync" + + "github.com/klauspost/compress/gzhttp/writer" +) + +// gzipWriterPools stores a sync.Pool for each compression level for reuse of +// gzip.Writers. Use poolIndex to covert a compression level to an index into +// gzipWriterPools. +var gzipWriterPools [gzip.BestCompression - gzip.HuffmanOnly + 1]*sync.Pool + +func init() { + for i := gzip.HuffmanOnly; i <= gzip.BestCompression; i++ { + addLevelPool(i) + } +} + +// poolIndex maps a compression level to its index into gzipWriterPools. It +// assumes that level is a valid gzip compression level. +func poolIndex(level int) int { + return level - gzip.HuffmanOnly +} + +func addLevelPool(level int) { + gzipWriterPools[poolIndex(level)] = &sync.Pool{ + New: func() interface{} { + // NewWriterLevel only returns error on a bad level, we are guaranteeing + // that this will be a valid level so it is okay to ignore the returned + // error. + w, _ := gzip.NewWriterLevel(nil, level) + return w + }, + } +} + +type pooledWriter struct { + *gzip.Writer + index int +} + +func (pw *pooledWriter) Close() error { + err := pw.Writer.Close() + gzipWriterPools[pw.index].Put(pw.Writer) + pw.Writer = nil + return err +} + +func NewWriter(w io.Writer, level int) writer.GzipWriter { + index := poolIndex(level) + gzw := gzipWriterPools[index].Get().(*gzip.Writer) + gzw.Reset(w) + return &pooledWriter{ + Writer: gzw, + index: index, + } +} + +func Levels() (min, max int) { + return gzip.HuffmanOnly, gzip.BestCompression +} + +func ImplementationInfo() string { + return "compress/gzip" +} diff --git a/gzhttp/writer/stdlib/stdlib_test.go b/gzhttp/writer/stdlib/stdlib_test.go new file mode 100644 index 0000000000..6782f97bf6 --- /dev/null +++ b/gzhttp/writer/stdlib/stdlib_test.go @@ -0,0 +1,27 @@ +package gzkp + +import ( + "bytes" + "compress/gzip" + "testing" +) + +func TestGzipDoubleClose(t *testing.T) { + // reset the pool for the default compression so we can make sure duplicates + // aren't added back by double close + addLevelPool(gzip.DefaultCompression) + + w := bytes.NewBufferString("") + writer := NewWriter(w, gzip.DefaultCompression) + writer.Close() + + // the second close shouldn't have added the same writer + // so we pull out 2 writers from the pool and make sure they're different + w1 := gzipWriterPools[poolIndex(gzip.DefaultCompression)].Get() + w2 := gzipWriterPools[poolIndex(gzip.DefaultCompression)].Get() + + if w1 == w2 { + t.Fatal("got same writer") + } + +}