Skip to content

Commit

Permalink
Merge pull request #8 from Dynom/ImprovingAutoComplete
Browse files Browse the repository at this point in the history
Improving auto complete
  • Loading branch information
Dynom authored Apr 14, 2020
2 parents 7941cf9 + de36000 commit 95b41f9
Show file tree
Hide file tree
Showing 13 changed files with 463 additions and 57 deletions.
6 changes: 6 additions & 0 deletions cmd/web/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,9 @@
# amount of time a rate-limited request is allowed to wait for. Anything above this is aborted, to help protect
# against connection draining. Requests are delayed
parkedTTL = "100ms"

[server.services.autocomplete]

# Domains need at least (inclusive) this amount of recipients to be considered for the autocomplete API
# This is mostly to prevent leaking possibly private information
recipientThreshold = 1000
6 changes: 5 additions & 1 deletion cmd/web/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ type Config struct {
} `toml:"log"`
Hash struct {
Key string `toml:"key"`
//Enable bool `toml:"enable"`
} `toml:"hash"`
Finder struct {
UseBuckets bool `toml:"useBuckets" usage:"Buckets speedup matching, but assumes no mistakes are made at the start"`
Expand All @@ -62,6 +61,11 @@ type Config struct {
Resolver string `toml:"resolver" usage:"The resolver to use for DNS lookups"`
SuggestValidator ValidatorType `toml:"suggest"`
} `toml:"validator" flag:",inline" env:",inline"`
Services struct {
Autocomplete struct {
RecipientThreshold uint64 `usage:"Define the minimum amount of recipients a domain needs before allowed in the autocomplete"`
} `toml:"autocomplete"`
} `toml:"services"`
Profiler struct {
Enable bool `toml:"enable" default:"false"`
Prefix string `toml:"prefix"`
Expand Down
4 changes: 3 additions & 1 deletion cmd/web/erihttp/handlers/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ func WithRequestLogger(logger logrus.FieldLogger) HandlerWrapper {

r = r.WithContext(context.WithValue(r.Context(), RequestID, rid))

logger.Debug("Request start")
logger.WithFields(logrus.Fields{
"content_length": r.ContentLength,
}).Debug("Request start")

defer func(w *CustomResponseWriter) {

Expand Down
7 changes: 0 additions & 7 deletions cmd/web/erihttp/server.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package erihttp

import (
"errors"
"io"
"log"
"net/http"
Expand All @@ -10,12 +9,6 @@ import (
"github.com/Dynom/ERI/cmd/web/config"
)

var (
ErrMissingBody = errors.New("missing body")
ErrInvalidRequest = errors.New("request is invalid")
ErrBodyTooLarge = errors.New("request body too large")
)

func BuildHTTPServer(mux http.Handler, config config.Config, logWriter io.Writer, handlers ...func(h http.Handler) http.Handler) *http.Server {
for _, h := range handlers {
mux = h(mux)
Expand Down
46 changes: 25 additions & 21 deletions cmd/web/erihttp/types.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
package erihttp

import "errors"

var (
ErrMissingBody = errors.New("missing body")
ErrInvalidRequest = errors.New("request is invalid")
ErrBodyTooLarge = errors.New("request body too large")
ErrUnsupportedContentType = errors.New("unsupported content-type")
)

type ERIResponse interface {

// Hacking around Generics, like it's 1999.
PrepareResponse()
}

type AutoCompleteResponse struct {
Suggestions []string `json:"suggestions"`
Error string `json:",omitempty"`
}

type CheckResponse struct {
Valid bool `json:"valid"`
Reason string `json:"reason,omitempty"`
Alternative string `json:"alternative,omitempty"`
func (r *AutoCompleteResponse) PrepareResponse() {
if r.Suggestions == nil {
r.Suggestions = []string{}
}
}

type SuggestResponse struct {
Alternatives []string `json:"alternatives"`
MalformedSyntax bool `json:"malformed_syntax"`
Error string `json:",omitempty"`
}

type ErrorResponse struct {
Error string `json:"error"`
func (r *SuggestResponse) PrepareResponse() {
if r.Alternatives == nil {
r.Alternatives = []string{}
}
}

type AutoCompleteRequest struct {
Expand All @@ -26,18 +45,3 @@ type AutoCompleteRequest struct {
type SuggestRequest struct {
Email string `json:"email"`
}

type CheckRequest struct {
Email string `json:"email"`
Alternatives bool `json:"with_alternatives"`
}

type LearnRequest struct {
Emails []ToLearn `json:"emails"`
Domains []ToLearn `json:"domains"`
}

type ToLearn struct {
Value string `json:"value"`
Valid bool `json:"valid"`
}
17 changes: 14 additions & 3 deletions cmd/web/erihttp/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,34 @@ import (
"net/http"
)

const (
MaxBodySize int64 = 1 << 20
)

func GetBodyFromHTTPRequest(r *http.Request) ([]byte, error) {
var empty []byte
const maxSizePlusOne int64 = 1<<20 + 1

if r.Body == nil {
return empty, ErrMissingBody
}

b, err := ioutil.ReadAll(io.LimitReader(r.Body, maxSizePlusOne))
if r.ContentLength > MaxBodySize {
return empty, ErrBodyTooLarge
}

if ct := r.Header.Get("Content-Type"); ct != "application/json" {
return empty, ErrUnsupportedContentType
}

b, err := ioutil.ReadAll(io.LimitReader(r.Body, MaxBodySize+1))
if err != nil {
if err == io.EOF {
return empty, ErrMissingBody
}
return empty, ErrInvalidRequest
}

if int64(len(b)) == maxSizePlusOne {
if int64(len(b)) > MaxBodySize {
return empty, ErrBodyTooLarge
}

Expand Down
87 changes: 78 additions & 9 deletions cmd/web/erihttp/util_test.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,97 @@
package erihttp

import (
"bytes"
"math"
"net/http"
"net/http/httptest"
"reflect"
"strings"
"testing"
)

func TestGetBodyFromHTTPRequest(t *testing.T) {
type args struct {
r *http.Request
}
tests := []struct {
name string
args args
req func(body []byte) *http.Request
want []byte
wantErr bool
wantErr error
}{
// TODO: Add test cases.
{
wantErr: nil,
name: "All good",
req: func(body []byte) *http.Request {
req := httptest.NewRequest(http.MethodPost, "/", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
return req
},
want: []byte("{}"),
},
{
wantErr: ErrMissingBody,
name: "Nil body",
req: func(_ []byte) *http.Request {
req := httptest.NewRequest(http.MethodGet, "/", nil)
req.Header.Set("Content-Type", "application/json")
req.Body = nil

return req
},
want: nil,
},
{
wantErr: ErrBodyTooLarge,
name: "Too lengthy/Content-Length",
req: func(_ []byte) *http.Request {
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(""))
req.Header.Set("Content-Type", "application/json")
req.ContentLength = math.MaxInt64
return req
},
want: nil,
},
{
wantErr: ErrBodyTooLarge,
name: "Too lengthy/Body",
req: func(_ []byte) *http.Request {
body := strings.Repeat("a", int(MaxBodySize+1))
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
req.ContentLength = int64(len(body) - 1)

return req
},
want: nil,
},
{
wantErr: ErrUnsupportedContentType,
name: "Content-Type/Missing",
req: func(_ []byte) *http.Request {
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(""))
req.Header.Del("Content-Type")
return req
},
want: nil,
},
{
wantErr: ErrUnsupportedContentType,
name: "Content-Type/Wrong",
req: func(_ []byte) *http.Request {
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(""))
req.Header.Set("Content-Type", "plain/text")
return req
},
want: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := GetBodyFromHTTPRequest(tt.args.r)
if (err != nil) != tt.wantErr {
t.Errorf("GetBodyFromHTTPRequest() error = %v, wantErr %v", err, tt.wantErr)
req := tt.req(tt.want)
got, err := GetBodyFromHTTPRequest(req)

if err != tt.wantErr {
t.Errorf("GetBodyFromHTTPRequest() error = %v, wantErr %q", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
Expand Down
65 changes: 51 additions & 14 deletions cmd/web/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http"
"time"

"github.com/Dynom/ERI/cmd/web/hitlist"
"github.com/Dynom/ERI/validator"

"github.com/Dynom/ERI/cmd/web/erihttp/handlers"
Expand All @@ -19,62 +20,98 @@ import (
"github.com/sirupsen/logrus"
)

func NewAutoCompleteHandler(logger logrus.FieldLogger, myFinder *finder.Finder) http.HandlerFunc {
func NewAutoCompleteHandler(logger logrus.FieldLogger, myFinder *finder.Finder, hitList *hitlist.HitList, recipientThreshold uint64) http.HandlerFunc {

const (
maxSuggestions = 5

FailedRequestError = "Request failed, unable to parse request body. Expected JSON."
DomainLookupFailedError = "Request failed, unable to lookup by domain."
FailedResponseError = "Generating response failed."
)

log := logger.WithField("handler", "auto complete")
return func(w http.ResponseWriter, r *http.Request) {
var err error
var req erihttp.AutoCompleteRequest

log := log.WithField(handlers.RequestID.String(), r.Context().Value(handlers.RequestID))
log = log.WithField(handlers.RequestID.String(), r.Context().Value(handlers.RequestID))

defer deferClose(r.Body, log)

body, err := erihttp.GetBodyFromHTTPRequest(r)
if err != nil {
log.WithError(err).Errorf("Error handling request %s", err)
log.WithFields(logrus.Fields{
"error": err,
"content_length": r.ContentLength,
}).Errorf("Error handling request %s", err)

w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("Request failed"))

// err is expected to be safe to expose to the client
writeErrorJSONResponse(logger, w, &erihttp.AutoCompleteResponse{Error: err.Error()})
return
}

err = json.Unmarshal(body, &req)
if err != nil {
log.WithError(err).Errorf("Error handling request body %s", err)

w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("Request failed, unable to parse request body. Did you send JSON?"))
writeErrorJSONResponse(log, w, &erihttp.AutoCompleteResponse{Error: FailedRequestError})
return
}

ctx, cancel := context.WithTimeout(r.Context(), time.Millisecond*500)
defer cancel()

if len(req.Domain) == 0 {
log.Error("Empty argument")
if req.Domain == "" {
log.Debug("Empty argument")
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("Request failed, unable to lookup by domain"))
writeErrorJSONResponse(log, w, &erihttp.AutoCompleteResponse{Error: DomainLookupFailedError})
return
}

list, err := myFinder.GetMatchingPrefix(ctx, req.Domain, 10)
list, err := myFinder.GetMatchingPrefix(ctx, req.Domain, maxSuggestions*2)
if err != nil {
log.WithError(err).Errorf("Error during lookup %s", err)
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("Request failed, unable to lookup by domain"))
log.WithError(err).Warn("Error during lookup")
w.WriteHeader(http.StatusBadRequest)
writeErrorJSONResponse(log, w, &erihttp.AutoCompleteResponse{Error: DomainLookupFailedError})
return
}

// Filter the list, so that we don't leak rarely used domain names. This might lead to privacy problems with personal
// domain names for example
var filteredList = make([]string, 0, maxSuggestions)
for _, domain := range list {
if ctx.Err() != nil {
w.WriteHeader(http.StatusBadRequest)

// @todo Is this a safe error to "leak" ?
writeErrorJSONResponse(log, w, &erihttp.AutoCompleteResponse{Error: ctx.Err().Error()})
return
}

if cnt := hitList.GetRecipientCount(hitlist.Domain(domain)); cnt >= recipientThreshold {
filteredList = append(filteredList, domain)
if len(filteredList) >= maxSuggestions {
break
}
}
}

response, err := json.Marshal(erihttp.AutoCompleteResponse{
Suggestions: list,
Suggestions: filteredList,
})

if err != nil {
log.WithFields(logrus.Fields{
"response": response,
"error": err,
}).Error("Failed to marshal the response")

w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("Unable to produce a response"))
writeErrorJSONResponse(log, w, &erihttp.AutoCompleteResponse{Error: FailedResponseError})
return
}

Expand Down
Loading

0 comments on commit 95b41f9

Please sign in to comment.