diff --git a/any_parser/any_parser.py b/any_parser/any_parser.py index 52e2076..01e407b 100644 --- a/any_parser/any_parser.py +++ b/any_parser/any_parser.py @@ -343,7 +343,6 @@ def async_fetch( if result_type == "json": return response.json()["json"] else: - print(response.json()) markdown_list = response.json()["markdown"] return "\n".join(markdown_list) if response.status_code == 202: diff --git a/examples/sample_data/test_w2.docx b/examples/sample_data/test_w2.docx new file mode 100644 index 0000000..2839c1e Binary files /dev/null and b/examples/sample_data/test_w2.docx differ diff --git a/examples/sample_data/test_w2.png b/examples/sample_data/test_w2.png new file mode 100644 index 0000000..7c2c08d Binary files /dev/null and b/examples/sample_data/test_w2.png differ diff --git a/examples/sample_data/test_w2.pptx b/examples/sample_data/test_w2.pptx new file mode 100644 index 0000000..42f3a4d Binary files /dev/null and b/examples/sample_data/test_w2.pptx differ diff --git a/run_tests.sh b/run_tests.sh index 45a03e8..2b559a3 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,2 +1,2 @@ #!/bin/sh -python -m unittest discover tests +python -m unittest discover tests -v diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..e7f4166 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,27 @@ +# Testing +Overview of running tests for AnyParser sdk. These should be run before submitting any pull request. + +These tests are written using the unittest framework in Python. The tests are located in the `tests/test.py` file. Test data is located in the `tests/test_data.py` file. + +## Setup +1. Install the required packages by running the following command: +```bash +pip install Levenshtein +``` + +## Running Tests +1. Make sure you are in the root folder. +2. Run the following command: +```bash +./run_tests.sh +``` + +If you just want to run an individual test within the test.py file, you can run the following command: +```bash +python -m unittest -k +``` + +For example, if you want to run `test_pdf_sync_extract`, you can run the following command: +```bash +python -m unittest -k test_pdf_sync_extract +``` diff --git a/tests/test.py b/tests/test.py index 35fe3ff..bd554be 100755 --- a/tests/test.py +++ b/tests/test.py @@ -2,11 +2,14 @@ import os import sys +import time import unittest import Levenshtein from dotenv import load_dotenv +from tests.test_data import EXTRACT_JSON_TEST_DATA + sys.path.append(".") load_dotenv(override=True) from any_parser import AnyParser # noqa: E402 @@ -174,6 +177,37 @@ def test_image_async_extract_and_fetch(self): percentage, 90, f"Output similarity too low: {percentage:.2f}%" ) + def test_sync_extract_json(self): + """Synchronous JSON Extraction with subtests for different file formats""" + for data in EXTRACT_JSON_TEST_DATA: + with self.subTest(working_file=data["working_file"]): + # extract + key_value_result, elapsed_time = self.ap.extract_json( + data["working_file"], data["extract_instruction"] + ) + + # assertions + self.assertEqual(key_value_result, data["correct_output"]) + self.assertIn("Time Elapsed", elapsed_time) + + def test_async_extract_json_and_fetch(self): + """Asynchronous JSON Extraction with subtests for different file formats""" + for data in EXTRACT_JSON_TEST_DATA: + with self.subTest(working_file=data["working_file"]): + # extract + file_id = self.ap.async_extract_json( + data["working_file"], data["extract_instruction"] + ) + self.assertFalse(file_id.startswith("Error:"), file_id) + # fetch + key_value_result = self.ap.async_fetch( + file_id=file_id, result_type="json" + ) + # assertions + self.assertEqual(key_value_result, data["correct_output"]) + # wait 1 s between requests + time.sleep(1) + if __name__ == "__main__": - unittest.main() + unittest.main(verbosity=2) diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000..7675d65 --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,78 @@ +EXTRACT_JSON_TEST_DATA = [ + { + "working_file": "./examples/sample_data/test1.pdf", + "extract_instruction": { + "social_security_number": "the social security number of the employee", + "ein": "the employer identification number", + "first_name": "the first name of the employee", + "last_name": "the last name of the employee", + }, + "correct_output": [ + [ + { + "social_security_number": "758-58-5787", + "ein": "78-8778788", + "first_name": "Jesan", + "last_name": "Rahaman", + } + ] + ], + }, + { + "working_file": "./examples/sample_data/test_w2.pptx", + "extract_instruction": { + "social_security_number": "the social security number of the employee", + "ein": "the employer identification number", + "first_name": "the first name of the employee", + "last_name": "the last name of the employee", + }, + "correct_output": [ + [ + { + "social_security_number": "758-58-5787", + "ein": "78-8778788", + "first_name": "Jesan", + "last_name": "Rahaman", + } + ] + ], + }, + { + "working_file": "./examples/sample_data/test_w2.docx", + "extract_instruction": { + "social_security_number": "the social security number of the employee", + "ein": "the employer identification number", + "first_name": "the first name of the employee", + "last_name": "the last name of the employee", + }, + "correct_output": [ + [ + { + "social_security_number": "758-58-5787", + "ein": "78-8778788", + "first_name": "Jesan", + "last_name": "Rahaman", + } + ] + ], + }, + { + "working_file": "./examples/sample_data/test_w2.png", + "extract_instruction": { + "social_security_number": "the social security number of the employee", + "ein": "the employer identification number", + "first_name": "the first name of the employee", + "last_name": "the last name of the employee", + }, + "correct_output": [ + [ + { + "social_security_number": "758-58-5787", + "ein": "78-8778788", + "first_name": "Jesan", + "last_name": "Rahaman", + } + ] + ], + }, +]