-
Notifications
You must be signed in to change notification settings - Fork 44
/
lang.go
92 lines (74 loc) · 1.66 KB
/
lang.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package main
import (
"strings"
)
const (
SosToken = "SOS"
EosToken = "EOS"
)
type IndexCount struct {
Index int
Count int
}
type Lang struct {
Name string
WordToIndexAndCount map[string]IndexCount
IndexToWord map[int]string
}
func NewLang(name string) (retVal Lang) {
lang := Lang{
Name: name,
WordToIndexAndCount: make(map[string]IndexCount, 0),
IndexToWord: make(map[int]string, 0),
}
lang.AddWord(SosToken)
lang.AddWord(EosToken)
return lang
}
func (l *Lang) AddWord(word string) {
if len(word) > 0 {
idxCount, ok := l.WordToIndexAndCount[word]
if !ok {
length := len(l.WordToIndexAndCount)
l.WordToIndexAndCount[word] = IndexCount{length, 1}
l.IndexToWord[length] = word
} else {
idxCount.Count += 1
l.WordToIndexAndCount[word] = idxCount
}
}
}
func (l *Lang) AddSentence(sentence string) {
words := strings.Split(sentence, " ")
for _, word := range words {
l.AddWord(word)
}
}
func (l *Lang) Len() (retVal int) {
return len(l.IndexToWord)
}
func (l *Lang) SosToken() (retVal int) {
return l.WordToIndexAndCount[SosToken].Index
}
func (l *Lang) EosToken() (retVal int) {
return l.WordToIndexAndCount[EosToken].Index
}
func (l *Lang) GetName() (retVal string) {
return l.Name
}
func (l *Lang) GetIndex(word string) (retVal int) {
idxCount, ok := l.WordToIndexAndCount[word]
if ok {
return idxCount.Index
} else {
return -1 // word does not exist in Lang
}
}
func (l *Lang) SeqToString(seq []int) (retVal string) {
var words []string = make([]string, 0)
for _, idx := range seq {
w := l.IndexToWord[idx]
words = append(words, w)
}
return strings.Join(words, " ")
}