Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typos (found by typos and codespell) and improve help text #16

Merged
merged 2 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ Parameters:
information. Defaults to the model bundled with ocrd_froc.
"fast_cocr" [boolean - true]
Whether to use optimization steps on the COCR strategy
"adaptive_treshold" [number - 95]
Treshold of certitude needed to use SelOCR when using the adaptive
"adaptive_threshold" [number - 95]
Threshold of certitude needed to use SelOCR when using the adaptive
strategy
"font_class_priors" [array - []]
List of font classes which are known to be present on the data when
using the adaptive/SelOCR strategies. When this option is specified,
every font classes not included will be ignored. If 'other' is
included in the list, font classification will not be outputted and
a generic model will be used for transcriptions.
using the adaptive/SelOCR strategies. If this option is specified,
any font classes not included are ignored. If 'other' is
included in the list, no font classification is output and
a generic model is used for transcriptions.
```
10 changes: 5 additions & 5 deletions ocrd_froc/froc.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def save(self, output):
Parameters
----------
output: string or file
File or path to the file to which the instane has to
File or path to the file to which the instance has to
be stored.
"""

Expand All @@ -108,7 +108,7 @@ def save(self, output):
self.selocr.to(self.dev)
self.cocr.to(self.dev)

def run(self, pil_image, method='adaptive', fast_cocr=True, adaptive_treshold=95, classification_result=None):
def run(self, pil_image, method='adaptive', fast_cocr=True, adaptive_threshold=95, classification_result=None):

if method in ('SelOCR', 'adaptive') and not classification_result:
raise ValueError(f"Froc.run(): if method is SelOCR or adaptive, classification_result is required")
Expand All @@ -120,7 +120,7 @@ def run(self, pil_image, method='adaptive', fast_cocr=True, adaptive_treshold=95
elif method == 'COCR':
out = self.run_cocr(tns, fast_cocr)
else:
out = self.run_adaptive(tns, classification_result, fast_cocr, adaptive_treshold)
out = self.run_adaptive(tns, classification_result, fast_cocr, adaptive_threshold)

# constrain to image width, expand to batch format (batch size 1)
base_width = [tns.shape[2]]
Expand Down Expand Up @@ -200,8 +200,8 @@ def run_cocr(self, tns, fast_cocr):
return out


def run_adaptive(self, tns, classification_result, fast_cocr, adaptive_treshold):
if max(classification_result.values()) > adaptive_treshold / 100:
def run_adaptive(self, tns, classification_result, fast_cocr, adaptive_threshold):
if max(classification_result.values()) > adaptive_threshold / 100:
return self.run_selocr(tns, classification_result)
else:
return self.run_cocr(tns, fast_cocr)
10 changes: 5 additions & 5 deletions ocrd_froc/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def convert_widths(self, w, max_width):
def __init_length_map(self):
"""
Initializes the map conversion system for convert_width(). Note
that it tries to cache the resuts in dat/length_map.json.
that it tries to cache the results in dat/length_map.json.
"""
max_length = 2000
try:
Expand Down Expand Up @@ -272,7 +272,7 @@ def forward(self, x):
Processes an input batch.

:param x: input batch
:return: the network's output, ready to be convered to a string
:return: the network's output, ready to be converted to a string
"""
x = self.backbone(x)
x = self.act(x)
Expand Down Expand Up @@ -400,7 +400,7 @@ def forward(self, x, model_idx=None):
single text line (because of the branching).

:param x: input batch
:return: the network's output, ready to be convered to a string
:return: the network's output, ready to be converted to a string
"""
if x.shape[0] != 1:
raise ValueError('SelOCR cannot work on batches containing multiple inputs, sorry')
Expand Down Expand Up @@ -540,7 +540,7 @@ def forward(self, x, fast_cocr=True):
Processes an input batch

:param x: input batch
:return: the network's output, ready to be convered to a string
:return: the network's output, ready to be converted to a string
"""
scores = F.softmax(self.classifier(x), dim=2)
res = 0
Expand Down Expand Up @@ -704,7 +704,7 @@ def forward(self, x):
Processes an input batch

:param x: input batch
:return: the network's output, ready to be convered to a string
:return: the network's output, ready to be converted to a string
"""
scores = F.softmax(self.classifier(x), dim=2)
txt = 0
Expand Down
6 changes: 3 additions & 3 deletions ocrd_froc/ocrd-tool.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@
"type": "boolean",
"default": true
},
"adaptive_treshold": {
"description": "Treshold of certitude needed to use SelOCR when using the adaptive strategy",
"adaptive_threshold": {
"description": "Threshold of certitude needed to use SelOCR when using the adaptive strategy",
"type": "number",
"format": "integer",
"default": 95
},
"font_class_priors": {
"description": "List of font classes which are known to be present on the data when using the adaptive/SelOCR strategies. When this option is specified, every font classes not included will be ignored. If 'other' is included in the list, font classification will not be outputted and a generic model will be used for transcriptions.",
"description": "List of font classes which are known to be present on the data when using the adaptive/SelOCR strategies. If this option is specified, any font classes not included are ignored. If 'other' is included in the list, no font classification is output and a generic model is used for transcriptions.",
"type": "array",
"items": {
"type": "string",
Expand Down
4 changes: 2 additions & 2 deletions ocrd_froc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ def _process_segment(self, segment, image):
classification_result=result)
else:
fast_cocr = self.parameter['fast_cocr']
adaptive_treshold = self.parameter['adaptive_treshold']
adaptive_threshold = self.parameter['adaptive_threshold']
transcription, score = self.froc.run(image,
method=ocr_method,
classification_result=result,
fast_cocr=fast_cocr,
adaptive_treshold=adaptive_treshold)
adaptive_treshold=adaptive_threshold)

if self.parameter['overwrite_text']:
segment.set_TextEquiv([TextEquivType(Unicode=transcription, conf=score)])
Expand Down