This repository has been archived by the owner on Oct 8, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
wsd.py
executable file
·51 lines (37 loc) · 1.7 KB
/
wsd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
import argparse
import concurrent.futures
import sys
import mnogoznal
parser = argparse.ArgumentParser(description='WSD.')
parser.add_argument('--inventory', required=True, type=argparse.FileType('r', encoding='UTF-8'))
parser.add_argument('--mystem', required=True, type=argparse.FileType('rb'))
parser.add_argument('--mode', choices=('sparse', 'dense'), default='sparse', type=str)
group = parser.add_mutually_exclusive_group()
group.add_argument('--w2v', default=None, type=argparse.FileType('rb'))
group.add_argument('--pyro', default=None, type=str)
args = parser.parse_args()
inventory = mnogoznal.Inventory(inventory_path=args.inventory.name)
if args.mode == 'sparse':
wsd = mnogoznal.SparseWSD(inventory=inventory)
elif args.mode == 'dense':
if args.w2v:
from gensim.models import KeyedVectors
w2v = KeyedVectors.load_word2vec_format(args.w2v, binary=True, unicode_errors='ignore')
w2v.init_sims(replace=True)
elif args.pyro:
from mnogoznal.pyro_vectors import PyroVectors as PyroVectors
w2v = PyroVectors(args.pyro)
else:
print('Please set the --w2v or --pyro option to engage the dense mode.', file=sys.stderr)
exit(1)
wsd = mnogoznal.DenseWSD(inventory=inventory, wv=w2v)
sentences = mnogoznal.mystem(input())
def disambiguate(index):
return wsd.disambiguate(sentences[index])
with concurrent.futures.ProcessPoolExecutor() as executor:
for i, result in enumerate(executor.map(disambiguate, range(len(sentences)))):
for (word, lemma, pos, _), id in result.items():
print('\t'.join((word, lemma, pos, id if id is not None else '')))
if i + 1 < len(sentences):
print()