Skip to content

Commit

Permalink
Adding preferred domains (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dynom authored Aug 18, 2020
1 parent ead6740 commit 4de8ad3
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 20 deletions.
9 changes: 9 additions & 0 deletions cmd/web/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,12 @@

# The maximum number of suggestions to return
maxSuggestions = 5


[services.suggest]

# Prefer allows mapping correct domains to favor alternative domains. A common example could be to map frequently
# occurring typos e.g.: example.com to point to example.org. The result is that when a mapping is found for a given
# domain, the preferred variant is prepended in the list of alternatives. The left-hand-side must be unique.
[services.suggest.prefer]
# The syntax is: "<domain>" = "<preferred domain>"
38 changes: 38 additions & 0 deletions cmd/web/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package config

import (
"encoding"
"errors"
"fmt"
"io/ioutil"
"strings"
Expand Down Expand Up @@ -73,6 +74,9 @@ type Config struct {
RecipientThreshold uint64 `toml:"recipientThreshold" usage:"Define the minimum amount of recipients a domain needs before allowed in the autocomplete"`
MaxSuggestions uint64 `toml:"maxSuggestions" usage:"The maximum number of suggestions to return"`
} `toml:"autocomplete"`
Suggest struct {
Prefer Preferred `toml:"prefer" env:"-" usage:"A repeatable flag to create a preference list for common alternatives, example.com=example.org"`
} `toml:"suggest"`
} `toml:"services"`
Backend struct {
Driver string `toml:"driver" usage:"List a driver to use, currently supporting: 'memory' or 'postgres'"`
Expand Down Expand Up @@ -114,6 +118,40 @@ func (c *Config) String() string {
return fmt.Sprintf("%+v", c.GetSensored())
}

type Preferred map[string]string

func (p Preferred) String() string {
var v string
for header, value := range p {
v += `"` + header + `" -> "` + value + `",`
}

if len(v) > 0 {
v = v[0 : len(v)-1]
}

return v
}

func (p *Preferred) Set(v string) error {
s := strings.SplitN(v, `=`, 2)
if len(s) != 2 {
return fmt.Errorf("invalid Preferred alternative argument %q, expecting <domain>=<preferred domain>", v)
}

if *p == nil {
*p = make(map[string]string, 1)
}

if _, exists := (*p)[s[0]]; exists {
return errors.New("duplicate preferred mapping specified")
}

(*p)[s[0]] = s[1]

return nil
}

type Headers map[string]string

func (h Headers) String() string {
Expand Down
4 changes: 2 additions & 2 deletions cmd/web/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ func TestNewSuggestHandler(t *testing.T) {
}
}

svc := services.NewSuggestService(myFinder, val, logger)
svc := services.NewSuggestService(myFinder, val, nil, logger)

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -358,7 +358,7 @@ func TestNewSuggestHandler(t *testing.T) {
}

// Building the service
svc := services.NewSuggestService(myFinder, val, logger)
svc := services.NewSuggestService(myFinder, val, nil, logger)
handlerFunc := NewSuggestHandler(logger, svc, maxBodySize)

// Setting up the request
Expand Down
5 changes: 4 additions & 1 deletion cmd/web/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"time"

"github.com/Dynom/ERI/cmd/web/preferrer"
"github.com/Dynom/ERI/cmd/web/pubsub/gcp"
"github.com/Dynom/ERI/runtimer"
"github.com/rs/cors"
Expand Down Expand Up @@ -110,8 +111,10 @@ func main() {
os.Exit(1)
}

prefer := preferrer.New(preferrer.Mapping(conf.Services.Suggest.Prefer))

validatorFn := createProxiedValidator(conf, logger, hitList, myFinder, pubSubSvc, persister)
suggestSvc := services.NewSuggestService(myFinder, validatorFn, logger)
suggestSvc := services.NewSuggestService(myFinder, validatorFn, prefer, logger)
autocompleteSvc := services.NewAutocompleteService(myFinder, hitList, conf.Services.Autocomplete.RecipientThreshold, logger)

mux := http.NewServeMux()
Expand Down
29 changes: 29 additions & 0 deletions cmd/web/preferrer/preferrer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package preferrer

import "github.com/Dynom/ERI/types"

type HasPreferred interface {
HasPreferred(parts types.EmailParts) (string, bool)
}

type Mapping map[string]string

func New(mapping Mapping) *Preferrer {
return &Preferrer{
m: mapping,
}
}

type Preferrer struct {
m Mapping
}

// HasPreferred returns the input when there isn't a match or a preferred result if it has. The second return argument
// should be used to discriminate between the two.
func (p *Preferrer) HasPreferred(parts types.EmailParts) (string, bool) {
if l, ok := p.m[parts.Domain]; ok {
return l, true
}

return parts.Domain, false
}
58 changes: 58 additions & 0 deletions cmd/web/preferrer/preferrer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package preferrer

import (
"reflect"
"testing"

"github.com/Dynom/ERI/types"
)

func TestNew(t *testing.T) {
type args struct {
mapping Mapping
}

tests := []struct {
name string
args args
want *Preferrer
}{
{name: "nil map", args: args{mapping: nil}, want: &Preferrer{}},
{name: "populated 1", args: args{mapping: Mapping{"a": "b"}}, want: &Preferrer{m: Mapping{"a": "b"}}},
{name: "populated N", args: args{mapping: Mapping{"a": "b", "b": "c"}}, want: &Preferrer{m: Mapping{"a": "b", "b": "c"}}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := New(tt.args.mapping); !reflect.DeepEqual(got, tt.want) {
t.Errorf("New() = %v, want %v", got, tt.want)
}
})
}
}

func TestPreferrer_HasPreferred(t *testing.T) {
tests := []struct {
name string
m Mapping
parts types.EmailParts
want string
has bool
}{

{name: "nil map", m: nil, parts: types.NewEmailFromParts("john.doe", "example.org"), want: "example.org", has: false},
{name: "match", m: Mapping{"example.com": "example.org"}, parts: types.NewEmailFromParts("john.doe", "example.com"), want: "example.org", has: true},
{name: "no match", m: Mapping{"a": "b"}, parts: types.NewEmailFromParts("john.doe", "example.org"), want: "example.org", has: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := &Preferrer{
m: tt.m,
}

got, has := p.HasPreferred(tt.parts)
if got != tt.want || has != tt.has {
t.Errorf("HasPreferred() got = %v, %t; want %v, %t", got, has, tt.want, tt.has)
}
})
}
}
66 changes: 50 additions & 16 deletions cmd/web/services/suggest.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strings"

"github.com/Dynom/ERI/cmd/web/erihttp/handlers"
"github.com/Dynom/ERI/cmd/web/preferrer"

"github.com/Dynom/ERI/validator"

Expand All @@ -14,27 +15,34 @@ import (
"github.com/Dynom/TySug/finder"
)

func NewSuggestService(f *finder.Finder, val validator.CheckFn, logger logrus.FieldLogger) *SuggestSvc {
func NewSuggestService(f *finder.Finder, val validator.CheckFn, prefer preferrer.HasPreferred, logger logrus.FieldLogger) *SuggestSvc {
if prefer == nil {
prefer = preferrer.New(nil)
}

return &SuggestSvc{
finder: f,
validator: val,
logger: logger.WithField("svc", "suggest"),
prefer: prefer,
}
}

type SuggestSvc struct {
finder *finder.Finder
validator validator.CheckFn
logger *logrus.Entry
prefer preferrer.HasPreferred
}

type SuggestResult struct {
Alternatives []string
}

// @todo make this configurable and Algorithm dependent
const finderThreshold = 0.8

func (c *SuggestSvc) Suggest(ctx context.Context, email string) (SuggestResult, error) {
// @todo make this configurable and Algorithm dependent
const finderThreshold = 0.8

var emailStrLower = strings.ToLower(email)
var sr = SuggestResult{
Expand Down Expand Up @@ -67,28 +75,54 @@ func (c *SuggestSvc) Suggest(ctx context.Context, email string) (SuggestResult,
err = validator.ErrEmailAddressSyntax
}

if vr.Validations.IsValid() {
return sr, err
if !vr.Validations.IsValid() {

// No result so far, proceeding with finding domain alternatives
alts := c.getAlternatives(ctx, parts)
if len(alts) > 0 {
sr.Alternatives = alts
}
}

// No result so far, proceeding with finding domains alternatives
var alts = make([]string, 0, len(sr.Alternatives))
for _, alt := range sr.Alternatives {
parts, err := types.NewEmailParts(alt)
if err != nil {
log.WithError(err).Error("Input doesn't have valid structure")
continue
}

if preferred, exists := c.prefer.HasPreferred(parts); exists {
parts := types.NewEmailFromParts(parts.Local, preferred)
alts = append(alts, parts.Address, alt)
} else {
alts = append(alts, alt)
}
}

sr.Alternatives = alts

return sr, err
}

func (c *SuggestSvc) getAlternatives(ctx context.Context, parts types.EmailParts) []string {

alt, score, exact := c.finder.FindCtx(ctx, parts.Domain)

log.WithFields(logrus.Fields{
"alt": alt,
"score": score,
"exact": exact,
"ctx_expired": didDeadlineExpire(ctx),
c.logger.WithFields(logrus.Fields{
handlers.RequestID.String(): ctx.Value(handlers.RequestID),
"alt": alt,
"score": score,
"threshold_met": score > finderThreshold,
"exact": exact,
"ctx_expired": didDeadlineExpire(ctx),
}).Debug("Used Finder")

if score > finderThreshold {
parts := types.NewEmailFromParts(parts.Local, alt)
return SuggestResult{
Alternatives: []string{parts.Address},
}, err
parts = types.NewEmailFromParts(parts.Local, alt)
}

return sr, err
return []string{parts.Address}
}

func didDeadlineExpire(ctx context.Context) bool {
Expand Down
24 changes: 23 additions & 1 deletion cmd/web/services/suggest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"testing"
"time"

"github.com/Dynom/ERI/cmd/web/preferrer"
"github.com/sirupsen/logrus"

"github.com/sirupsen/logrus/hooks/test"
Expand Down Expand Up @@ -44,6 +45,7 @@ func TestSuggestSvc_Suggest(t *testing.T) {
validator validator.CheckFn
finderList []string
logContains string
preferMap preferrer.Mapping
}{
{
name: "All good",
Expand All @@ -53,6 +55,15 @@ func TestSuggestSvc_Suggest(t *testing.T) {
validator: createMockValidator(validations.FSyntax|validations.FValid, validations.FSyntax|validations.FValid),
finderList: []string{},
},
{
name: "Including preferred",
email: "[email protected]",
want: SuggestResult{Alternatives: []string{"[email protected]", "[email protected]"}},
wantErr: false,
validator: createMockValidator(validations.FSyntax|validations.FValid, validations.FSyntax|validations.FValid),
finderList: []string{"example.com", "example.org"},
preferMap: preferrer.Mapping{"example.com": "example.org"},
},
{
name: "Invalid domain, should fall back on finder",
email: "[email protected]",
Expand All @@ -61,6 +72,15 @@ func TestSuggestSvc_Suggest(t *testing.T) {
validator: createMockValidator(validations.FSyntax, validations.FSyntax),
finderList: []string{"example.org"},
},
{
name: "Invalid domain, should fall back on finder and be corrected by preferrer",
email: "[email protected]",
want: SuggestResult{Alternatives: []string{"[email protected]"}},
wantErr: false,
validator: createMockValidator(validations.FSyntax, validations.FSyntax),
finderList: []string{"example.org"},
preferMap: preferrer.Mapping{"example.com": "example.org"},
},
{
name: "Invalid domain, finder has no alternative",
email: "[email protected]",
Expand Down Expand Up @@ -99,7 +119,9 @@ func TestSuggestSvc_Suggest(t *testing.T) {
return
}

svc := NewSuggestService(f, tt.validator, logger)
p := preferrer.New(tt.preferMap)

svc := NewSuggestService(f, tt.validator, p, logger)
got, err := svc.Suggest(context.Background(), tt.email)
if (err != nil) != tt.wantErr {
t.Errorf("Suggest() error = %v, wantErr %v", err, tt.wantErr)
Expand Down

0 comments on commit 4de8ad3

Please sign in to comment.