-
Notifications
You must be signed in to change notification settings - Fork 1
/
icdar15.py
120 lines (91 loc) · 4.68 KB
/
icdar15.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Functions default_evaluation_params and transcription_match come
# from official evaluation file script.py hosted on the rrc server:
# https://rrc.cvc.uab.es/?ch=4&com=mymethods&task=4
# and are subject to the original license terms of that work.
# Function transcription_score is a derivative work of function
# transcription_match.
from typing import Any
from pyeditdistance.distance import normalized_levenshtein # type: ignore
def default_evaluation_params() -> dict[str,Any]:
"""
default_evaluation_params: Default parameters to use for the validation and evaluation.
"""
return {
'IOU_CONSTRAINT' :0.5,
'AREA_PRECISION_CONSTRAINT' :0.5,
'WORD_SPOTTING' :False,
'MIN_LENGTH_CARE_WORD' :3,
'GT_SAMPLE_NAME_2_ID':'gt_img_([0-9]+).txt',
'DET_SAMPLE_NAME_2_ID':'res_img_([0-9]+).txt',
'LTRB':False, #LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
'CRLF':False, # Lines are delimited by Windows CRLF format
'CONFIDENCES':False, #Detections must include confidence value. AP will be calculated,
'SPECIAL_CHARACTERS':'!?.:,*"()·[]/\'',
'ONLY_REMOVE_FIRST_LAST_CHARACTER' : True
}
# Note optional arguments have the same values as the default_evaluation_params
# used above
def transcription_match(transGt: str, transDet: str,
specialCharacters: str='!?.:,*"()·[]/\'',
onlyRemoveFirstLastCharacterGT: bool=True) -> bool:
transGt = transGt.upper()
transDet = transDet.upper()
if onlyRemoveFirstLastCharacterGT:
#special characters in GT are allowed only at initial or final position
if (transGt==transDet):
return True
if specialCharacters.find(transGt[0])>-1:
if transGt[1:]==transDet:
return True
if specialCharacters.find(transGt[-1])>-1:
if transGt[0:len(transGt)-1]==transDet:
return True
if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
if transGt[1:len(transGt)-1]==transDet:
return True
return False
else:
#Special characters are removed from the begining and the end of both Detection and GroundTruth
while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
transGt = transGt[1:]
while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
transDet = transDet[1:]
while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
transGt = transGt[0:len(transGt)-1]
while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
transDet = transDet[0:len(transDet)-1]
return transGt == transDet
def transcription_score(transGt: str, transDet: str,
specialCharacters: str='!?.:,*"()·[]/\'',
onlyRemoveFirstLastCharacterGT: bool=True) -> float:
"""Calculate 1-NED with appropriate penalty-free transformations"""
transGt = transGt.upper()
transDet = transDet.upper()
def str_score(p,d):
return 1 - normalized_levenshtein(p,d)
if transcription_match(transGt, transDet):
return 1.0
if onlyRemoveFirstLastCharacterGT:
#special characters in GT are allowed only at initial or final position
# check front and back char
if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
return str_score( transGt[1:len(transGt)-1], transDet)
# not both, check back only
elif specialCharacters.find(transGt[-1])>-1:
return str_score(transGt[0:len(transGt)-1], transDet)
# not back, check front
elif specialCharacters.find(transGt[0])>-1:
return str_score( transGt[1:], transDet)
else: # no special characters
return str_score( transGt, transDet )
else:
#Special characters are removed from the begining and the end of both Detection and GroundTruth
while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
transGt = transGt[1:]
while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
transDet = transDet[1:]
while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
transGt = transGt[0:len(transGt)-1]
while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
transDet = transDet[0:len(transDet)-1]
return str_score(transGt,transDet)