This repository has been archived by the owner on Aug 27, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 16
/
script.py
83 lines (61 loc) · 2.43 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/python
# -*- coding: utf-8 -*-
__author__ = 'paulo.rodenas'
import logging
import os, sys, time
from tempfile import NamedTemporaryFile
from service.ocr import *
from service.postprocessing import TaxReceiptFuzzyRegex
from service.aws import TaxReceiptSimpleQueueServiceIntegration, \
SimpleStorageServiceIntegration, BaseSimpleQueueServiceIntegration
import json
def handle_process_message_function(queue_name_in, message_body):
"""
{"transaction_id": 1,"object": "IMG_2943_SMALL.jpeg"}
"""
logger = logging.getLogger(__name__)
start = time.time()
# Receive message from SQS
json_message = json.loads(message_body)
# Create a temp file
file_suffix = os.path.splitext(json_message.get('object'))[1]
image_file = NamedTemporaryFile(suffix=file_suffix, delete=True)
# Retrieve file from Amazon S3
s3 = SimpleStorageServiceIntegration()
s3.download_file(json_message.get('object'), image_file.name)
# Perform OCR on it
ocr_tool = PyOCRIntegration('eng')
results = ocr_tool.image_to_string(image_file.name)
# Close file which causes this temp file to be deleted
image_file.close()
logging.debug('Result: ')
logging.debug(results)
# Start looking for meaningful values
logger.debug('Start - Fuzzy Matching')
tax_receipt_fuzzy_regex = TaxReceiptFuzzyRegex(results)
ret_value = tax_receipt_fuzzy_regex.identify_needed_fields()
logger.debug(ret_value)
logger.debug('End - Fuzzy Matching')
# Calculate the time it took to perform all those steps
end = time.time()
elapsed = end - start
logger.debug('Execution took %f seconds' % elapsed)
ret_value.update({'transaction_id': json_message.get('transaction_id'), 'elapsedTime': elapsed})
return ret_value
def handle_queue_out_message_function(queue_name_out, response_body):
json_response = json.dumps(response_body)
sqs_service = BaseSimpleQueueServiceIntegration()
sqs_service.send_message(queue_name_out, json_response)
if __name__ == "__main__":
logging.config.fileConfig('/etc/ocr-process-service/logging.ini')
PyOCRIntegration.check_required_software()
aws_sqs = TaxReceiptSimpleQueueServiceIntegration(
handle_process_message_function,
handle_queue_out_message_function
)
try:
thread = aws_sqs.start_listening()
while True:
time.sleep(1)
except (KeyboardInterrupt, SystemExit):
sys.exit()