Skip to content

Commit

Permalink
add sync and async extrqct-json tests, add README
Browse files Browse the repository at this point in the history
  • Loading branch information
jojortz committed Oct 21, 2024
1 parent 582ddf6 commit 6cb2fdb
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 3 deletions.
1 change: 0 additions & 1 deletion any_parser/any_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ def async_fetch(
if result_type == "json":
return response.json()["json"]
else:
print(response.json())
markdown_list = response.json()["markdown"]
return "\n".join(markdown_list)
if response.status_code == 202:
Expand Down
Binary file added examples/sample_data/test_w2.docx
Binary file not shown.
Binary file added examples/sample_data/test_w2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/sample_data/test_w2.pptx
Binary file not shown.
2 changes: 1 addition & 1 deletion run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/sh
python -m unittest discover tests
python -m unittest discover tests -v
27 changes: 27 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Testing
Overview of running tests for AnyParser sdk. These should be run before submitting any pull request.

These tests are written using the unittest framework in Python. The tests are located in the `tests/test.py` file. Test data is located in the `tests/test_data.py` file.

## Setup
1. Install the required packages by running the following command:
```bash
pip install Levenshtein
```

## Running Tests
1. Make sure you are in the root folder.
2. Run the following command:
```bash
./run_tests.sh
```

If you just want to run an individual test within the test.py file, you can run the following command:
```bash
python -m unittest -k <test_name>
```

For example, if you want to run `test_pdf_sync_extract`, you can run the following command:
```bash
python -m unittest -k test_pdf_sync_extract
```
36 changes: 35 additions & 1 deletion tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

import os
import sys
import time
import unittest

import Levenshtein
from dotenv import load_dotenv

from tests.test_data import EXTRACT_JSON_TEST_DATA

sys.path.append(".")
load_dotenv(override=True)
from any_parser import AnyParser # noqa: E402
Expand Down Expand Up @@ -174,6 +177,37 @@ def test_image_async_extract_and_fetch(self):
percentage, 90, f"Output similarity too low: {percentage:.2f}%"
)

def test_sync_extract_json(self):
"""Synchronous JSON Extraction with subtests for different file formats"""
for data in EXTRACT_JSON_TEST_DATA:
with self.subTest(working_file=data["working_file"]):
# extract
key_value_result, elapsed_time = self.ap.extract_json(
data["working_file"], data["extract_instruction"]
)

# assertions
self.assertEqual(key_value_result, data["correct_output"])
self.assertIn("Time Elapsed", elapsed_time)

def test_async_extract_json_and_fetch(self):
"""Asynchronous JSON Extraction with subtests for different file formats"""
for data in EXTRACT_JSON_TEST_DATA:
with self.subTest(working_file=data["working_file"]):
# extract
file_id = self.ap.async_extract_json(
data["working_file"], data["extract_instruction"]
)
self.assertFalse(file_id.startswith("Error:"), file_id)
# fetch
key_value_result = self.ap.async_fetch(
file_id=file_id, result_type="json"
)
# assertions
self.assertEqual(key_value_result, data["correct_output"])
# wait 1 s between requests
time.sleep(1)


if __name__ == "__main__":
unittest.main()
unittest.main(verbosity=2)
78 changes: 78 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
EXTRACT_JSON_TEST_DATA = [
{
"working_file": "./examples/sample_data/test1.pdf",
"extract_instruction": {
"social_security_number": "the social security number of the employee",
"ein": "the employer identification number",
"first_name": "the first name of the employee",
"last_name": "the last name of the employee",
},
"correct_output": [
[
{
"social_security_number": "758-58-5787",
"ein": "78-8778788",
"first_name": "Jesan",
"last_name": "Rahaman",
}
]
],
},
{
"working_file": "./examples/sample_data/test_w2.pptx",
"extract_instruction": {
"social_security_number": "the social security number of the employee",
"ein": "the employer identification number",
"first_name": "the first name of the employee",
"last_name": "the last name of the employee",
},
"correct_output": [
[
{
"social_security_number": "758-58-5787",
"ein": "78-8778788",
"first_name": "Jesan",
"last_name": "Rahaman",
}
]
],
},
{
"working_file": "./examples/sample_data/test_w2.docx",
"extract_instruction": {
"social_security_number": "the social security number of the employee",
"ein": "the employer identification number",
"first_name": "the first name of the employee",
"last_name": "the last name of the employee",
},
"correct_output": [
[
{
"social_security_number": "758-58-5787",
"ein": "78-8778788",
"first_name": "Jesan",
"last_name": "Rahaman",
}
]
],
},
{
"working_file": "./examples/sample_data/test_w2.png",
"extract_instruction": {
"social_security_number": "the social security number of the employee",
"ein": "the employer identification number",
"first_name": "the first name of the employee",
"last_name": "the last name of the employee",
},
"correct_output": [
[
{
"social_security_number": "758-58-5787",
"ein": "78-8778788",
"first_name": "Jesan",
"last_name": "Rahaman",
}
]
],
},
]

0 comments on commit 6cb2fdb

Please sign in to comment.