From 4de8ad392c5f1b5639f215ddad5c16bb7af83bc8 Mon Sep 17 00:00:00 2001 From: Mark van der Velden Date: Tue, 18 Aug 2020 13:59:16 +0200 Subject: [PATCH] Adding preferred domains (#16) --- cmd/web/config.toml | 9 ++++ cmd/web/config/config.go | 38 +++++++++++++++++ cmd/web/handlers_test.go | 4 +- cmd/web/main.go | 5 ++- cmd/web/preferrer/preferrer.go | 29 +++++++++++++ cmd/web/preferrer/preferrer_test.go | 58 +++++++++++++++++++++++++ cmd/web/services/suggest.go | 66 ++++++++++++++++++++++------- cmd/web/services/suggest_test.go | 24 ++++++++++- 8 files changed, 213 insertions(+), 20 deletions(-) create mode 100644 cmd/web/preferrer/preferrer.go create mode 100644 cmd/web/preferrer/preferrer_test.go diff --git a/cmd/web/config.toml b/cmd/web/config.toml index a6f5676..8180bf1 100644 --- a/cmd/web/config.toml +++ b/cmd/web/config.toml @@ -117,3 +117,12 @@ # The maximum number of suggestions to return maxSuggestions = 5 + + + [services.suggest] + + # Prefer allows mapping correct domains to favor alternative domains. A common example could be to map frequently + # occurring typos e.g.: example.com to point to example.org. The result is that when a mapping is found for a given + # domain, the preferred variant is prepended in the list of alternatives. The left-hand-side must be unique. + [services.suggest.prefer] + # The syntax is: "" = "" diff --git a/cmd/web/config/config.go b/cmd/web/config/config.go index b56a257..0488d77 100644 --- a/cmd/web/config/config.go +++ b/cmd/web/config/config.go @@ -2,6 +2,7 @@ package config import ( "encoding" + "errors" "fmt" "io/ioutil" "strings" @@ -73,6 +74,9 @@ type Config struct { RecipientThreshold uint64 `toml:"recipientThreshold" usage:"Define the minimum amount of recipients a domain needs before allowed in the autocomplete"` MaxSuggestions uint64 `toml:"maxSuggestions" usage:"The maximum number of suggestions to return"` } `toml:"autocomplete"` + Suggest struct { + Prefer Preferred `toml:"prefer" env:"-" usage:"A repeatable flag to create a preference list for common alternatives, example.com=example.org"` + } `toml:"suggest"` } `toml:"services"` Backend struct { Driver string `toml:"driver" usage:"List a driver to use, currently supporting: 'memory' or 'postgres'"` @@ -114,6 +118,40 @@ func (c *Config) String() string { return fmt.Sprintf("%+v", c.GetSensored()) } +type Preferred map[string]string + +func (p Preferred) String() string { + var v string + for header, value := range p { + v += `"` + header + `" -> "` + value + `",` + } + + if len(v) > 0 { + v = v[0 : len(v)-1] + } + + return v +} + +func (p *Preferred) Set(v string) error { + s := strings.SplitN(v, `=`, 2) + if len(s) != 2 { + return fmt.Errorf("invalid Preferred alternative argument %q, expecting =", v) + } + + if *p == nil { + *p = make(map[string]string, 1) + } + + if _, exists := (*p)[s[0]]; exists { + return errors.New("duplicate preferred mapping specified") + } + + (*p)[s[0]] = s[1] + + return nil +} + type Headers map[string]string func (h Headers) String() string { diff --git a/cmd/web/handlers_test.go b/cmd/web/handlers_test.go index 1008c18..03b6f2f 100644 --- a/cmd/web/handlers_test.go +++ b/cmd/web/handlers_test.go @@ -305,7 +305,7 @@ func TestNewSuggestHandler(t *testing.T) { } } - svc := services.NewSuggestService(myFinder, val, logger) + svc := services.NewSuggestService(myFinder, val, nil, logger) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -358,7 +358,7 @@ func TestNewSuggestHandler(t *testing.T) { } // Building the service - svc := services.NewSuggestService(myFinder, val, logger) + svc := services.NewSuggestService(myFinder, val, nil, logger) handlerFunc := NewSuggestHandler(logger, svc, maxBodySize) // Setting up the request diff --git a/cmd/web/main.go b/cmd/web/main.go index a1dadf4..7dbeb71 100644 --- a/cmd/web/main.go +++ b/cmd/web/main.go @@ -6,6 +6,7 @@ import ( "os" "time" + "github.com/Dynom/ERI/cmd/web/preferrer" "github.com/Dynom/ERI/cmd/web/pubsub/gcp" "github.com/Dynom/ERI/runtimer" "github.com/rs/cors" @@ -110,8 +111,10 @@ func main() { os.Exit(1) } + prefer := preferrer.New(preferrer.Mapping(conf.Services.Suggest.Prefer)) + validatorFn := createProxiedValidator(conf, logger, hitList, myFinder, pubSubSvc, persister) - suggestSvc := services.NewSuggestService(myFinder, validatorFn, logger) + suggestSvc := services.NewSuggestService(myFinder, validatorFn, prefer, logger) autocompleteSvc := services.NewAutocompleteService(myFinder, hitList, conf.Services.Autocomplete.RecipientThreshold, logger) mux := http.NewServeMux() diff --git a/cmd/web/preferrer/preferrer.go b/cmd/web/preferrer/preferrer.go new file mode 100644 index 0000000..fd44e84 --- /dev/null +++ b/cmd/web/preferrer/preferrer.go @@ -0,0 +1,29 @@ +package preferrer + +import "github.com/Dynom/ERI/types" + +type HasPreferred interface { + HasPreferred(parts types.EmailParts) (string, bool) +} + +type Mapping map[string]string + +func New(mapping Mapping) *Preferrer { + return &Preferrer{ + m: mapping, + } +} + +type Preferrer struct { + m Mapping +} + +// HasPreferred returns the input when there isn't a match or a preferred result if it has. The second return argument +// should be used to discriminate between the two. +func (p *Preferrer) HasPreferred(parts types.EmailParts) (string, bool) { + if l, ok := p.m[parts.Domain]; ok { + return l, true + } + + return parts.Domain, false +} diff --git a/cmd/web/preferrer/preferrer_test.go b/cmd/web/preferrer/preferrer_test.go new file mode 100644 index 0000000..1bdafcb --- /dev/null +++ b/cmd/web/preferrer/preferrer_test.go @@ -0,0 +1,58 @@ +package preferrer + +import ( + "reflect" + "testing" + + "github.com/Dynom/ERI/types" +) + +func TestNew(t *testing.T) { + type args struct { + mapping Mapping + } + + tests := []struct { + name string + args args + want *Preferrer + }{ + {name: "nil map", args: args{mapping: nil}, want: &Preferrer{}}, + {name: "populated 1", args: args{mapping: Mapping{"a": "b"}}, want: &Preferrer{m: Mapping{"a": "b"}}}, + {name: "populated N", args: args{mapping: Mapping{"a": "b", "b": "c"}}, want: &Preferrer{m: Mapping{"a": "b", "b": "c"}}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := New(tt.args.mapping); !reflect.DeepEqual(got, tt.want) { + t.Errorf("New() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPreferrer_HasPreferred(t *testing.T) { + tests := []struct { + name string + m Mapping + parts types.EmailParts + want string + has bool + }{ + + {name: "nil map", m: nil, parts: types.NewEmailFromParts("john.doe", "example.org"), want: "example.org", has: false}, + {name: "match", m: Mapping{"example.com": "example.org"}, parts: types.NewEmailFromParts("john.doe", "example.com"), want: "example.org", has: true}, + {name: "no match", m: Mapping{"a": "b"}, parts: types.NewEmailFromParts("john.doe", "example.org"), want: "example.org", has: false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := &Preferrer{ + m: tt.m, + } + + got, has := p.HasPreferred(tt.parts) + if got != tt.want || has != tt.has { + t.Errorf("HasPreferred() got = %v, %t; want %v, %t", got, has, tt.want, tt.has) + } + }) + } +} diff --git a/cmd/web/services/suggest.go b/cmd/web/services/suggest.go index 09f6c0f..23a3e04 100644 --- a/cmd/web/services/suggest.go +++ b/cmd/web/services/suggest.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/Dynom/ERI/cmd/web/erihttp/handlers" + "github.com/Dynom/ERI/cmd/web/preferrer" "github.com/Dynom/ERI/validator" @@ -14,11 +15,16 @@ import ( "github.com/Dynom/TySug/finder" ) -func NewSuggestService(f *finder.Finder, val validator.CheckFn, logger logrus.FieldLogger) *SuggestSvc { +func NewSuggestService(f *finder.Finder, val validator.CheckFn, prefer preferrer.HasPreferred, logger logrus.FieldLogger) *SuggestSvc { + if prefer == nil { + prefer = preferrer.New(nil) + } + return &SuggestSvc{ finder: f, validator: val, logger: logger.WithField("svc", "suggest"), + prefer: prefer, } } @@ -26,15 +32,17 @@ type SuggestSvc struct { finder *finder.Finder validator validator.CheckFn logger *logrus.Entry + prefer preferrer.HasPreferred } type SuggestResult struct { Alternatives []string } +// @todo make this configurable and Algorithm dependent +const finderThreshold = 0.8 + func (c *SuggestSvc) Suggest(ctx context.Context, email string) (SuggestResult, error) { - // @todo make this configurable and Algorithm dependent - const finderThreshold = 0.8 var emailStrLower = strings.ToLower(email) var sr = SuggestResult{ @@ -67,28 +75,54 @@ func (c *SuggestSvc) Suggest(ctx context.Context, email string) (SuggestResult, err = validator.ErrEmailAddressSyntax } - if vr.Validations.IsValid() { - return sr, err + if !vr.Validations.IsValid() { + + // No result so far, proceeding with finding domain alternatives + alts := c.getAlternatives(ctx, parts) + if len(alts) > 0 { + sr.Alternatives = alts + } } - // No result so far, proceeding with finding domains alternatives + var alts = make([]string, 0, len(sr.Alternatives)) + for _, alt := range sr.Alternatives { + parts, err := types.NewEmailParts(alt) + if err != nil { + log.WithError(err).Error("Input doesn't have valid structure") + continue + } + + if preferred, exists := c.prefer.HasPreferred(parts); exists { + parts := types.NewEmailFromParts(parts.Local, preferred) + alts = append(alts, parts.Address, alt) + } else { + alts = append(alts, alt) + } + } + + sr.Alternatives = alts + + return sr, err +} + +func (c *SuggestSvc) getAlternatives(ctx context.Context, parts types.EmailParts) []string { + alt, score, exact := c.finder.FindCtx(ctx, parts.Domain) - log.WithFields(logrus.Fields{ - "alt": alt, - "score": score, - "exact": exact, - "ctx_expired": didDeadlineExpire(ctx), + c.logger.WithFields(logrus.Fields{ + handlers.RequestID.String(): ctx.Value(handlers.RequestID), + "alt": alt, + "score": score, + "threshold_met": score > finderThreshold, + "exact": exact, + "ctx_expired": didDeadlineExpire(ctx), }).Debug("Used Finder") if score > finderThreshold { - parts := types.NewEmailFromParts(parts.Local, alt) - return SuggestResult{ - Alternatives: []string{parts.Address}, - }, err + parts = types.NewEmailFromParts(parts.Local, alt) } - return sr, err + return []string{parts.Address} } func didDeadlineExpire(ctx context.Context) bool { diff --git a/cmd/web/services/suggest_test.go b/cmd/web/services/suggest_test.go index 7375d98..8c5e698 100644 --- a/cmd/web/services/suggest_test.go +++ b/cmd/web/services/suggest_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/Dynom/ERI/cmd/web/preferrer" "github.com/sirupsen/logrus" "github.com/sirupsen/logrus/hooks/test" @@ -44,6 +45,7 @@ func TestSuggestSvc_Suggest(t *testing.T) { validator validator.CheckFn finderList []string logContains string + preferMap preferrer.Mapping }{ { name: "All good", @@ -53,6 +55,15 @@ func TestSuggestSvc_Suggest(t *testing.T) { validator: createMockValidator(validations.FSyntax|validations.FValid, validations.FSyntax|validations.FValid), finderList: []string{}, }, + { + name: "Including preferred", + email: "john.doe@example.com", + want: SuggestResult{Alternatives: []string{"john.doe@example.org", "john.doe@example.com"}}, + wantErr: false, + validator: createMockValidator(validations.FSyntax|validations.FValid, validations.FSyntax|validations.FValid), + finderList: []string{"example.com", "example.org"}, + preferMap: preferrer.Mapping{"example.com": "example.org"}, + }, { name: "Invalid domain, should fall back on finder", email: "john.doe@example.or", @@ -61,6 +72,15 @@ func TestSuggestSvc_Suggest(t *testing.T) { validator: createMockValidator(validations.FSyntax, validations.FSyntax), finderList: []string{"example.org"}, }, + { + name: "Invalid domain, should fall back on finder and be corrected by preferrer", + email: "john.doe@example.cm", + want: SuggestResult{Alternatives: []string{"john.doe@example.org"}}, + wantErr: false, + validator: createMockValidator(validations.FSyntax, validations.FSyntax), + finderList: []string{"example.org"}, + preferMap: preferrer.Mapping{"example.com": "example.org"}, + }, { name: "Invalid domain, finder has no alternative", email: "john.doe@example.or", @@ -99,7 +119,9 @@ func TestSuggestSvc_Suggest(t *testing.T) { return } - svc := NewSuggestService(f, tt.validator, logger) + p := preferrer.New(tt.preferMap) + + svc := NewSuggestService(f, tt.validator, p, logger) got, err := svc.Suggest(context.Background(), tt.email) if (err != nil) != tt.wantErr { t.Errorf("Suggest() error = %v, wantErr %v", err, tt.wantErr)