-
Notifications
You must be signed in to change notification settings - Fork 7
/
finders.go
79 lines (71 loc) · 1.76 KB
/
finders.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package anno
import "bytes"
// Punctuation is a string of common punctuation and quotation characters.
var Punctuation = `.?!'",;`
// TrimPunctuation trims off Punctuation characters.
func TrimPunctuation(s []byte) []byte {
return bytes.TrimRight(s, Punctuation)
}
var tlds = [][]byte{
[]byte(".com"),
[]byte(".dev"),
[]byte(".net"),
[]byte(".org"),
[]byte(".edu"),
[]byte(".co"),
[]byte(".io"),
[]byte(".uk"),
[]byte(".us"),
[]byte(".ca"),
[]byte(".de"),
[]byte(".jp"),
[]byte(".fr"),
[]byte(".au"),
[]byte(".ru"),
[]byte(".ch"),
[]byte(".it"),
[]byte(".nl"),
[]byte(".se"),
[]byte(".no"),
[]byte(".es"),
[]byte(".mil"),
}
// Emails finds email addresses.
var Emails = FieldFunc("email", func(s []byte) (bool, []byte) {
trimmedS := TrimPunctuation(s)
if !bytes.Contains(s, []byte("@")) { // not email address
return false, s
}
for _, tld := range tlds {
if bytes.HasSuffix(s, tld) {
return true, trimmedS
}
}
return false, s
})
// URL finds web addresses.
var URLs = FieldFunc("url", func(s []byte) (bool, []byte) {
trimmedS := TrimPunctuation(s)
if bytes.Contains(s, []byte("@")) { // email address
return false, s
}
if bytes.HasPrefix(trimmedS, []byte("http")) || bytes.HasPrefix(trimmedS, []byte("www")) {
return true, trimmedS
}
for _, tld := range tlds {
if bytes.HasSuffix(trimmedS, tld) {
return true, trimmedS
}
}
return false, s
})
// Mentions finds @twitter style mentions.
var Mentions = FieldFunc("mention", func(s []byte) (bool, []byte) {
trimmedS := TrimPunctuation(s)
return bytes.HasPrefix(trimmedS, []byte("@")), trimmedS
})
// Hashtags finds #hashtags.
var Hashtags = FieldFunc("hashtag", func(s []byte) (bool, []byte) {
trimmedS := TrimPunctuation(s)
return bytes.HasPrefix(trimmedS, []byte("#")), trimmedS
})