-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsample.py
56 lines (37 loc) · 1.12 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from lexpp import Lexpp
pp = Lexpp()
def replace_with_synonynms():
INPUT = "宛先はこちら"
# In practical, you must tokenize text into words
res = INPUT[0:2]
entry = list(pp.lookup(res))[0]
synset = pp.get_synset(entry)
replaced = []
for syn in synset:
replaced.append(INPUT.replace(res, syn.surface))
print(INPUT, "->", replaced)
def normalize_hyokiyure():
"""
from synonyms.txt
000027,1,0,1,0,0,0,(店),漫画喫茶,,
000027,1,0,1,0,0,2,(店),まんが喫茶,,
000027,1,0,1,0,0,2,(店),マンガ喫茶,,
000027,1,0,1,0,2,0,(店),漫喫,,
000027,1,0,1,0,2,2,(店),まん喫,,
000027,1,0,1,0,2,2,(店),マン喫,,
"""
reference = "漫画喫茶"
hyokiyure_list = ["まんが喫茶", "マンガ喫茶", "漫喫", "まん喫", "マン喫"]
print("original:", hyokiyure_list)
repr_form_list = []
for i in hyokiyure_list:
entry = list(pp.lookup(i))[0]
repr_form = pp.get_representative_form(entry)
assert(reference == repr_form)
repr_form_list.append(repr_form)
print("normalized:", repr_form_list)
def main():
replace_with_synonynms()
normalize_hyokiyure()
if __name__ == '__main__':
main()