Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure fields are lists before combining in DocDetails class #801

Merged
merged 20 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
67686c1
Ensure fields are lists before combining in DocDetails class
harryvu-futurehouse Jan 9, 2025
1505d5d
Update .mailmap to include new author entries for Harry Vu
harryvu-futurehouse Jan 9, 2025
73d86a2
Add test for merging DocDetails to ensure list format consistency
harryvu-futurehouse Jan 10, 2025
c957124
add more test for `DocDetails` `__add__`
harryvu-futurehouse Jan 10, 2025
b3505a5
add more test for `DocDetails` `__add__`
harryvu-futurehouse Jan 10, 2025
8d07844
rename test for better explanation
harryvu-futurehouse Jan 10, 2025
c0213f1
merge main to current branch
harryvu-futurehouse Jan 10, 2025
0dfa222
move tests for docdetails merge to `test_paper.py`
harryvu-futurehouse Jan 10, 2025
b5f7166
move tests for merging DocDetails to `test_paperqa.py`
harryvu-futurehouse Jan 10, 2025
bda482c
adding comments to explain test cases
harryvu-futurehouse Jan 10, 2025
983da66
clarifying comments
harryvu-futurehouse Jan 10, 2025
11a1ede
Merge remote-tracking branch 'origin/main' into fix-DocDetails
harryvu-futurehouse Jan 10, 2025
d27da1a
make code more self-documenting
harryvu-futurehouse Jan 10, 2025
a8ea231
make code more self-documented
harryvu-futurehouse Jan 10, 2025
9acb48b
Merge remote-tracking branch 'origin/main' into fix-DocDetails
harryvu-futurehouse Jan 10, 2025
69128f9
making the tests more documented
harryvu-futurehouse Jan 10, 2025
eff9268
Merge remote-tracking branch 'origin/main' into fix-DocDetails
harryvu-futurehouse Jan 10, 2025
17d8c70
remove unnecessary `@pytest.mark.asyncio`
harryvu-futurehouse Jan 10, 2025
f600954
document the tests
harryvu-futurehouse Jan 10, 2025
7c4d21c
correct docstring
harryvu-futurehouse Jan 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ Odhran O'Donoghue <[email protected]> <[email protected]
Samantha Cox <[email protected]> <[email protected]>
Anush008 <[email protected]> Anush <[email protected]>
Mayk Caldas <[email protected]> maykcaldas <[email protected]>
Harry Vu <[email protected]> <[email protected]>
Harry Vu <[email protected]> harryvu-futurehouse
10 changes: 10 additions & 0 deletions paperqa/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,16 @@ def __add__(self, other: DocDetails | int) -> DocDetails: # noqa: PLR0912
merged_data[field] = {**self.other, **other.other}
# handle the bibtex / sources as special fields
for field_to_combine in ("bibtex_source", "client_source"):
# Ensure the fields are lists before combining
harryvu-futurehouse marked this conversation as resolved.
Show resolved Hide resolved
if self.other.get(field_to_combine) and not isinstance(
self.other[field_to_combine], list
jamesbraza marked this conversation as resolved.
Show resolved Hide resolved
):
self.other[field_to_combine] = [self.other[field_to_combine]]
if other.other.get(field_to_combine) and not isinstance(
harryvu-futurehouse marked this conversation as resolved.
Show resolved Hide resolved
other.other[field_to_combine], list
):
other.other[field_to_combine] = [other.other[field_to_combine]]

if self.other.get(field_to_combine) and other.other.get(
field_to_combine
):
Expand Down
65 changes: 65 additions & 0 deletions tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import textwrap
from collections.abc import AsyncIterable, Sequence
from copy import deepcopy
from datetime import datetime, timedelta
from io import BytesIO
from pathlib import Path
from typing import cast
Expand Down Expand Up @@ -1239,6 +1240,70 @@ def test_dois_resolve_to_correct_journals(doi_journals):
assert details.journal == doi_journals["journal"]


def test_docdetails_merge_with_non_list_fields():
# Test merging a DocDetail that was republished
harryvu-futurehouse marked this conversation as resolved.
Show resolved Hide resolved
initial_date = datetime(2023, 1, 1)
doc1 = DocDetails(
citation="Citation 1",
publication_date=initial_date,
docname="Document 1",
dockey="key1",
other={"bibtex_source": "source1", "client_source": ["client1"]},
)

later_publication_date = initial_date + timedelta(weeks=13)
doc2 = DocDetails(
citation=doc1.citation,
publication_date=later_publication_date,
docname=doc1.docname,
dockey=doc1.dockey,
other={"bibtex_source": ["source2"], "client_source": "client2"},
harryvu-futurehouse marked this conversation as resolved.
Show resolved Hide resolved
)

# Merge the two DocDetails instances
merged_doc = doc1 + doc2

assert {"source1", "source2"}.issubset(
merged_doc.other["bibtex_source"]
), "Expected merge to keep both bibtex sources"
assert {"client1", "client2"}.issubset(
merged_doc.other["client_source"]
), "Expected merge to keep both client sources"
assert isinstance(merged_doc, DocDetails), "Merged doc should also be DocDetails"


def test_docdetails_merge_with_list_fields():
# Test merging a DocDetail that was republished
harryvu-futurehouse marked this conversation as resolved.
Show resolved Hide resolved
initial_date = datetime(2023, 1, 1)
doc1 = DocDetails(
citation="Citation 1",
publication_date=initial_date,
docname="Document 1",
dockey="key1",
other={"bibtex_source": ["source1"], "client_source": ["client1"]},
)

later_publication_date = initial_date + timedelta(weeks=13)
doc2 = DocDetails(
citation=doc1.citation,
publication_date=later_publication_date,
docname=doc1.docname,
dockey=doc1.dockey,
other={"bibtex_source": ["source2"], "client_source": ["client2"]},
)

# Merge the two DocDetails instances
merged_doc = doc1 + doc2

assert {"source1", "source2"}.issubset(
merged_doc.other["bibtex_source"]
), "Expected merge to keep both bibtex sources"
assert {"client1", "client2"}.issubset(
merged_doc.other["client_source"]
), "Expected merge to keep both client sources"
assert isinstance(merged_doc, DocDetails), "Merged doc should also be DocDetails"


@pytest.mark.vcr
@pytest.mark.parametrize("use_partition", [True, False])
@pytest.mark.asyncio
Expand Down
Loading