Skip to content

Commit

Permalink
Merge pull request #84 from manykarim/remove-threadpoolexecutor
Browse files Browse the repository at this point in the history
Remove threadpoolexecutor
  • Loading branch information
manykarim authored Aug 2, 2023
2 parents 68ca29a + b1234b3 commit 6bdf640
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 67 deletions.
18 changes: 10 additions & 8 deletions DocTest/CompareImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,9 @@ def __init__(self, image, **kwargs):
self.barcodes = []
self.rerendered_for_ocr = False
self.mupdfdoc= None
self.ocr_performed = False
self.load_image_into_array()
self.load_text_content_and_identify_masks()


toc = time.perf_counter()
print(f"Compare Image Object created in {toc - tic:0.4f} seconds")

Expand Down Expand Up @@ -129,6 +128,7 @@ def get_ocr_text_data(self, ocr_config: str='--psm 11', ocr_lang: str='eng', inc
height_list.append(d['height'][j])
conf_list.append(d['conf'][j])
self.text_content.append({'text': text_list, 'left': left_list, 'top': top_list, 'width': width_list, 'height': height_list, 'conf': conf_list})
self.ocr_performed = True

def increase_resolution_for_ocr(self):
# experimental: IF OCR is used and DPI is lower than self.MINIMUM_OCR_RESOLUTION DPI, re-render with self.MINIMUM_OCR_RESOLUTION DPI
Expand Down Expand Up @@ -161,6 +161,7 @@ def get_text_content_with_east(self, increase_resolution: bool=True):
for frame in self.opencv_images:
text = self.east_text_extractor.get_image_text(frame)
self.text_content.append(text)
self.ocr_performed = True

def identify_placeholders(self):
placeholders = None
Expand Down Expand Up @@ -212,12 +213,13 @@ def identify_placeholders(self):
# print(pattern)

if self.mupdfdoc is None or self.force_ocr is True:
if self.ocr_engine == 'tesseract':
self.get_ocr_text_data()
elif self.ocr_engine == 'east':
self.get_text_content_with_east()
else:
self.get_ocr_text_data()
if self.ocr_performed is False:
if self.ocr_engine == 'tesseract':
self.get_ocr_text_data()
elif self.ocr_engine == 'east':
self.get_text_content_with_east()
else:
self.get_ocr_text_data()
for i in range(len(self.opencv_images)):
d = self.text_content[i]
keys = list(d.keys())
Expand Down
88 changes: 33 additions & 55 deletions DocTest/VisualTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import math
from DocTest.Downloader import is_url, download_file_from_url
import logging


@library
Expand Down Expand Up @@ -179,24 +180,16 @@ def compare_images(self, reference_image: str, test_image: str, placeholder_file
raise AssertionError(
'The candidate file does not exist: {}'.format(test_image))

with futures.ThreadPoolExecutor(max_workers=2) as parallel_executor:
reference_future = parallel_executor.submit(CompareImage, reference_image, placeholder_file=placeholder_file, contains_barcodes=contains_barcodes,
get_pdf_content=get_pdf_content, DPI=self.DPI, force_ocr=force_ocr, mask=mask, ocr_engine=ocr_engine)
candidate_future = parallel_executor.submit(
CompareImage, test_image, contains_barcodes=contains_barcodes, get_pdf_content=get_pdf_content, DPI=self.DPI)
reference_compare_image = reference_future.result()
candidate_compare_image = candidate_future.result()
reference_compare_image = CompareImage(reference_image, placeholder_file=placeholder_file, contains_barcodes=contains_barcodes, get_pdf_content=get_pdf_content, DPI=self.DPI, force_ocr=force_ocr, mask=mask, ocr_engine=ocr_engine)
candidate_compare_image = CompareImage(test_image, contains_barcodes=contains_barcodes, get_pdf_content=get_pdf_content, DPI=self.DPI)


tic = time.perf_counter()
if reference_compare_image.placeholders != []:
candidate_compare_image.placeholders = reference_compare_image.placeholders
with futures.ThreadPoolExecutor(max_workers=2) as parallel_executor:
reference_collection_future = parallel_executor.submit(
reference_compare_image.get_image_with_placeholders)
compare_collection_future = parallel_executor.submit(
candidate_compare_image.get_image_with_placeholders)
reference_collection = reference_collection_future.result()
compare_collection = compare_collection_future.result()
reference_collection = reference_compare_image.get_image_with_placeholders()
compare_collection = candidate_compare_image.get_image_with_placeholders()
logging.debug("OCR Data: {}".format(reference_compare_image.text_content))
else:
reference_collection = reference_compare_image.opencv_images
compare_collection = candidate_compare_image.opencv_images
Expand All @@ -216,25 +209,19 @@ def compare_images(self, reference_image: str, test_image: str, placeholder_file
compare_collection[i], "_candidate_page_" + str(i+1))
raise AssertionError(
'Reference File and Candidate File have different number of pages')

check_difference_results = []
with futures.ThreadPoolExecutor(max_workers=8) as parallel_executor:
for i, (reference, candidate) in enumerate(zip(reference_collection, compare_collection)):
if get_pdf_content:
try:
reference_pdf_content = reference_compare_image.mupdfdoc[i]
candidate_pdf_content = candidate_compare_image.mupdfdoc[i]
except:
reference_pdf_content = reference_compare_image.mupdfdoc[0]
candidate_pdf_content = candidate_compare_image.mupdfdoc[0]
else:
reference_pdf_content = None
candidate_pdf_content = None
check_difference_results.append(parallel_executor.submit(
self.check_for_differences, reference, candidate, i, detected_differences, compare_options, reference_pdf_content, candidate_pdf_content))
for result in check_difference_results:
if result.exception() is not None:
raise result.exception()

for i, (reference, candidate) in enumerate(zip(reference_collection, compare_collection)):
if get_pdf_content:
try:
reference_pdf_content = reference_compare_image.mupdfdoc[i]
candidate_pdf_content = candidate_compare_image.mupdfdoc[i]
except:
reference_pdf_content = reference_compare_image.mupdfdoc[0]
candidate_pdf_content = candidate_compare_image.mupdfdoc[0]
else:
reference_pdf_content = None
candidate_pdf_content = None
self.check_for_differences(reference, candidate, i, detected_differences, compare_options, reference_pdf_content, candidate_pdf_content)
if reference_compare_image.barcodes != []:
if reference_compare_image.barcodes != candidate_compare_image.barcodes:
detected_differences.append(True)
Expand Down Expand Up @@ -482,13 +469,8 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
raise AssertionError(
f'The compared images have different dimensions:\nreference:{reference.shape}\ncandidate:{candidate.shape}')

with futures.ThreadPoolExecutor(max_workers=2) as parallel_executor:
grayA_future = parallel_executor.submit(
cv2.cvtColor, reference, cv2.COLOR_BGR2GRAY)
grayB_future = parallel_executor.submit(
cv2.cvtColor, candidate, cv2.COLOR_BGR2GRAY)
grayA = grayA_future.result()
grayB = grayB_future.result()
grayA = cv2.cvtColor(reference, cv2.COLOR_BGR2GRAY)
grayB = cv2.cvtColor(candidate, cv2.COLOR_BGR2GRAY)

# Blur images if blur=True
if compare_options['blur']:
Expand Down Expand Up @@ -621,10 +603,10 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
print(text_reference)
else:
images_are_equal = False
detected_differences.append(True)
print("Partial text content is different")
print(text_reference +
" is not equal to " + text_candidate)
raise AssertionError('The compared images are different.')
elif compare_options["get_pdf_content"] is True:

images_are_equal = True
Expand All @@ -651,17 +633,17 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c

if len(diff_area_ref_words) != len(diff_area_cand_words):
images_are_equal = False
detected_differences.append(True)
print("The identified pdf layout elements are different",
diff_area_ref_words, diff_area_cand_words)
raise AssertionError('The compared images are different.')
else:

if diff_area_ref_words.strip() != diff_area_cand_words.strip():
images_are_equal = False
detected_differences.append(True)
print("Partial text content is different")
print(diff_area_ref_words.strip(
), " is not equal to ", diff_area_cand_words.strip())
raise AssertionError('The compared images are different.')
if images_are_equal:
print("Partial text content of area is the same")
print(diff_area_ref_words)
Expand Down Expand Up @@ -697,20 +679,17 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
except:
print("Error in finding position in compare image")
images_are_equal = False
detected_differences.append(True)
continue
raise AssertionError('The compared images are different.')
#positions_in_compare_image = self.find_partial_image_position(candidate, diff_area_reference)
if (np.mean(diff_area_reference) == 255) or (np.mean(diff_area_candidate) == 255):
images_are_equal = False
detected_differences.append(True)

print("Image section contains only white background")

self.add_screenshot_to_log(np.concatenate((cv2.copyMakeBorder(diff_area_reference, top=2, bottom=2, left=2, right=2, borderType=cv2.BORDER_CONSTANT, value=[
0, 0, 0]), cv2.copyMakeBorder(diff_area_candidate, top=2, bottom=2, left=2, right=2, borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])), axis=1), "_diff_area_concat")

#self.add_screenshot_to_log(np.concatenate((diff_area_reference, diff_area_candidate), axis=1), "_diff_area_concat")

raise AssertionError('The compared images are different.')
else:
if positions_in_compare_image:
# if positions_in_compare_image contains a key 'distance'
Expand All @@ -723,9 +702,9 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
print(
"This is outside of the allowed range of ", move_tolerance, " pixels")
images_are_equal = False
detected_differences.append(True)
self.add_screenshot_to_log(self.overlay_two_images(
search_area_reference, search_area_candidate), "_diff_area_blended")
raise AssertionError('The compared images are different.')
else:
print("Image section moved ",
move_distance, " pixels")
Expand All @@ -749,9 +728,9 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
print(
"This is outside of the allowed range of ", move_tolerance, " pixels")
images_are_equal = False
detected_differences.append(True)
self.add_screenshot_to_log(self.overlay_two_images(
search_area_reference, search_area_candidate), "_diff_area_blended")
raise AssertionError('The compared images are different.')

else:
print("Image section moved ",
Expand All @@ -763,11 +742,11 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c

else:
images_are_equal = False
detected_differences.append(True)
print(
"The reference image section was not found in test image (or vice versa)")
self.add_screenshot_to_log(np.concatenate((cv2.copyMakeBorder(diff_area_reference, top=2, bottom=2, left=2, right=2, borderType=cv2.BORDER_CONSTANT, value=[
0, 0, 0]), cv2.copyMakeBorder(diff_area_candidate, top=2, bottom=2, left=2, right=2, borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])), axis=1), "_diff_area_concat")
raise AssertionError('The compared images are different.')

elif compare_options["get_pdf_content"] is True:
images_are_equal = True
Expand All @@ -793,9 +772,9 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c

if len(diff_area_ref_words) != len(diff_area_cand_words):
images_are_equal = False
detected_differences.append(True)
print("The identified pdf layout elements are different",
diff_area_ref_words, diff_area_cand_words)
raise AssertionError('The compared images are different.')
else:
for ref_Item, cand_Item in zip(diff_area_ref_words, diff_area_cand_words):
if ref_Item == cand_Item:
Expand All @@ -819,10 +798,9 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
print(
"This is outside of the allowed range of ", move_tolerance, " pixels")
images_are_equal = False
detected_differences.append(True)
self.add_screenshot_to_log(self.overlay_two_images(
diff_area_reference, diff_area_candidate), "_diff_area_blended")

raise AssertionError('The compared images are different.')
else:
print("Image section moved ", left_moved,
top_moved, right_moved, bottom_moved, " pixels")
Expand All @@ -831,7 +809,7 @@ def check_for_differences(self, reference, candidate, i, detected_differences, c
self.add_screenshot_to_log(self.overlay_two_images(
diff_area_reference, diff_area_candidate), "_diff_area_blended")
if images_are_equal is not True:
detected_differences.append(True)
raise AssertionError('The compared images are different.')

@keyword
def get_text_from_document(self, image: str, ocr_engine: str="tesseract", ocr_config: str='--psm 11', ocr_lang: str='eng', increase_resolution: bool=True, ocr_confidence: int=20):
Expand Down
Loading

0 comments on commit 6bdf640

Please sign in to comment.