Skip to content

Commit

Permalink
Merge pull request #109 from tjmlabs/base64-bug-fix
Browse files Browse the repository at this point in the history
Enforce valid base64 or url for document upsertion
  • Loading branch information
Jonathan-Adly authored Dec 4, 2024
2 parents 0a7c52f + 2dc5c86 commit 78123d6
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
52 changes: 52 additions & 0 deletions web/api/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,58 @@ async def test_create_document_pdf_url_await(async_client, user):
}


async def test_create_document_invalid_url(async_client, user):
response = await async_client.post(
"/documents/upsert-document/",
json={
"name": "Test Document Fixture",
"url": "Hello",
"wait": True,
},
headers={"Authorization": f"Bearer {user.token}"},
)

assert response.status_code == 422
assert response.json() == {
"detail": [
{
"type": "value_error",
"loc": ["body", "payload"],
"msg": "Value error, Provided 'url' is not valid. Please provide a valid URL.",
"ctx": {
"error": "Provided 'url' is not valid. Please provide a valid URL."
},
}
]
}


async def test_create_document_invalid_base64(async_client, user):
response = await async_client.post(
"/documents/upsert-document/",
json={
"name": "Test Document Fixture",
"base64": "Hello",
"wait": True,
},
headers={"Authorization": f"Bearer {user.token}"},
)

assert response.status_code == 422
assert response.json() == {
"detail": [
{
"type": "value_error",
"loc": ["body", "payload"],
"msg": "Value error, Provided 'base64' is not valid. Please provide a valid base64 string.",
"ctx": {
"error": "Provided 'base64' is not valid. Please provide a valid base64 string."
},
}
]
}


async def test_create_document_pdf_url_async(async_client, user):
response = await async_client.post(
"/documents/upsert-document/",
Expand Down
22 changes: 22 additions & 0 deletions web/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,28 @@ def base64_or_url(self) -> Self:
raise ValueError("Either 'url' or 'base64' must be provided.")
if self.url and self.base64:
raise ValueError("Only one of 'url' or 'base64' should be provided.")

# Validate base64
if self.base64:
base64_pattern = r"^[A-Za-z0-9+/]+={0,2}$"
is_base64 = (
re.match(base64_pattern, self.base64) and len(self.base64) % 4 == 0
)

if not is_base64:
raise ValueError(
"Provided 'base64' is not valid. Please provide a valid base64 string."
)

# Validate URL
if self.url:
parsed = urlparse(self.url)
is_url = all([parsed.scheme, parsed.netloc])

if not is_url:
raise ValueError(
"Provided 'url' is not valid. Please provide a valid URL."
)
return self


Expand Down

0 comments on commit 78123d6

Please sign in to comment.