-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathfind_similar_lines.py
66 lines (45 loc) · 1.53 KB
/
find_similar_lines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
amharic_dictionary = {}
english_dictionary = {}
with open('am.txt', 'r') as f:
for count, line in enumerate(f, start=1):
amharic_dictionary[count] = line
with open('en.txt', 'r') as f:
for count, line in enumerate(f, start=1):
english_dictionary[count] = line
list_of_similarity = []
for n in range(1,len(english_dictionary)-2):
count = 0
for each in english_dictionary.values():
if english_dictionary[n] == each:
count += 1
if count > 1:
list_of_similarity.append(n)
continue
n += 1
print(n)
print(list_of_similarity)
lines_to_remove = []
while len(list_of_similarity) > 1:
first_index = list_of_similarity[0]
first_english = english_dictionary[first_index]
first_amharic = amharic_dictionary[first_index]
list_of_similarity.pop(0)
for each in list_of_similarity:
if amharic_dictionary[each] == first_amharic and english_dictionary[each] == first_english:
lines_to_remove.append(each)
print(lines_to_remove)
for items in lines_to_remove:
print(items)
if items in amharic_dictionary.keys() and items in amharic_dictionary.keys():
amharic_dictionary.pop(items)
english_dictionary.pop(items)
fo_amharic = open('new-am.txt', 'w')
for k, v in amharic_dictionary.items():
fo_amharic.write(str(v))
fo_amharic.close()
fo_english = open('new-en.txt', 'w')
for k, v in english_dictionary.items():
fo_english.write(str(v))
fo_english.close()
print(len(amharic_dictionary))
print(len(english_dictionary))