From d44378abb0483833ff40d87a3231357360c89c44 Mon Sep 17 00:00:00 2001 From: f**k Date: Thu, 26 Nov 2020 23:36:46 +0800 Subject: [PATCH] Fix excessive memory usage --- colly.go | 14 +++++++++++++- http_backend.go | 14 +++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/colly.go b/colly.go index ea9a2f321..67d14abde 100644 --- a/colly.go +++ b/colly.go @@ -665,8 +665,20 @@ func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ct return !request.abort } + var response *Response + var err error + defer func() { + if response != nil { + response.Body = []byte{} + response.Headers = nil + response.Request = nil + response.Ctx = nil + response.Trace = nil + responsePool.Put(response) + } + }() origURL := req.URL - response, err := c.backend.Cache(req, c.MaxBodySize, checkHeadersFunc, c.CacheDir) + response, err = c.backend.Cache(req, c.MaxBodySize, checkHeadersFunc, c.CacheDir) if proxyURL, ok := req.Context().Value(ProxyURLKey).(string); ok { request.ProxyURL = proxyURL } diff --git a/http_backend.go b/http_backend.go index fe96c9a6d..dc927e6c8 100644 --- a/http_backend.go +++ b/http_backend.go @@ -40,6 +40,10 @@ type httpBackend struct { lock *sync.RWMutex } +var responsePool = sync.Pool{New: func() interface{} { + return &Response{} +}} + type checkHeadersFunc func(statusCode int, header http.Header) bool // LimitRule provides connection restrictions for domains. @@ -210,11 +214,11 @@ func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc c if err != nil { return nil, err } - return &Response{ - StatusCode: res.StatusCode, - Body: body, - Headers: &res.Header, - }, nil + response := responsePool.Get().(*Response) + response.StatusCode = res.StatusCode + response.Body = body + response.Headers = &res.Header + return response, nil } func (h *httpBackend) Limit(rule *LimitRule) error {