-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunused_readings.py
79 lines (61 loc) · 2.24 KB
/
unused_readings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from models.anki import AnkiModel
from models.kanji import Kanji
from models.kanji_word import KanjiWord
from utf8_helper import force_UTF8
import kana
from collections import defaultdict
from itertools import islice
import settings
if __name__ == '__main__':
force_UTF8()
# Get the readings of every single kanji
mapping = defaultdict(list)
for word in KanjiWord.all():
for reading in word.kanji_readings:
mapping[reading['base']].append(reading['reading'])
# Now remove any that are used
# try:
# kanji = Kanji.find(reading['base'])
# except KeyError:
# if kana.is_kana(reading['base']):
# raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % (
# reading['base'], word.kanji, word.reading
# ))
# else:
# raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
# reading['base'], word.kanji
# ))
# if reading['reading'] not in kanji.readings and kanji.kanji != '々':
# print '%s(%s) word(%s)' % (
# kanji.kanji,
# reading['reading'],
# word.kanji,
# )
for kanji in Kanji.all():
for reading in kanji._readings:
possible = map(unicode, reading.get_all())
# See if we can find the reading in our kanji words list
found = False
for use in mapping[kanji.kanji]:
if use in possible:
found = True
# Ignore any that are used
if found:
continue
# See if its been ignored
string = "(kanji: %s) %s" % (kanji.kanji, reading)
if string in Kanji.ignored:
continue
# Ask the user if he wants to ignore this one
inp = raw_input("Ignore? %s: " % string)
if inp == "y":
Kanji.add_unused_reading(string)
# print ', '.join(map(unicode, kanji._readings))
# The leftovers are the unused readings
# for kanji, readings in islice(mapping.iteritems(), 0, 10):
# print '%s(%s)' % (
# kanji,
# ', '.join(readings),
# )