Skip to content

Commit

Permalink
Dandelion io (#483)
Browse files Browse the repository at this point in the history
* incorporate dandelion awkward array changes

* update docstrings

* merge master changes, update dandelion test fixture

* fix formatting

* enforce dandelion >=0.3.5 version in toml

* fixed faulty reference to dandelion function in docstring
  • Loading branch information
amoschoomy authored Feb 1, 2024
1 parent d776d01 commit acc228e
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 34 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ test = [
'black',
]
dandelion = [
'sc-dandelion>=0.2.3',
'sc-dandelion>=0.3.5',
]
diversity = [
'scikit-bio>=0.5.7'
Expand Down
42 changes: 10 additions & 32 deletions src/scirpy/io/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,47 +581,32 @@ def write_airr(adata: DataHandler.TYPE, filename: Union[str, Path], **kwargs) ->
writer.close()


def to_dandelion(adata: DataHandler.TYPE, **kwargs):
def to_dandelion(adata: DataHandler.TYPE):
"""Export data to `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
Parameters
----------
adata
annotated data matrix with :term:`IR` annotations.
**kwargs
additional arguments passed to :func:`~scirpy.io.to_airr_cells`
Returns
-------
`Dandelion` object.
"""
try:
import dandelion as ddl
from dandelion import from_scirpy
except ImportError:
raise ImportError("Please install dandelion: pip install sc-dandelion.") from None
airr_cells = to_airr_cells(adata, **kwargs)

contig_dicts = {}
for tmp_cell in airr_cells:
for i, chain in enumerate(tmp_cell.to_airr_records(), start=1):
# dandelion-specific modifications
chain.update(
{
"sequence_id": f"{tmp_cell.cell_id}_contig_{i}",
}
)
contig_dicts[chain["sequence_id"]] = chain

data = pd.DataFrame.from_dict(contig_dicts, orient="index")
return ddl.Dandelion(ddl.load_data(data))
return from_scirpy(adata)


@_doc_params(doc_working_model=doc_working_model)
def from_dandelion(dandelion, transfer: bool = False, **kwargs) -> AnnData:
def from_dandelion(dandelion, transfer: bool = False, to_mudata: bool = False, **kwargs) -> AnnData:
"""\
Import data from `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
Internally calls :func:`scirpy.io.read_airr`.
Internally calls `dandelion.to_scirpy`.
{doc_working_model}
Expand All @@ -632,29 +617,22 @@ def from_dandelion(dandelion, transfer: bool = False, **kwargs) -> AnnData:
transfer
Whether to execute `dandelion.tl.transfer` to transfer all data
to the :class:`anndata.AnnData` instance.
to_mudata
Return MuData object instead of AnnData object.
**kwargs
Additional arguments passed to :func:`scirpy.io.read_airr`.
Additional arguments passed to `dandelion.to_scirpy`.
Returns
-------
AnnData object with :term:`AIRR` data in `obsm["airr"]` for each cell. For more details see
:ref:`data-structure`.
"""
try:
import dandelion as ddl
from dandelion import to_scirpy
except ImportError:
raise ImportError("Please install dandelion: pip install sc-dandelion.") from None

dandelion_df = dandelion.data.copy()
# replace "unassigned" with None
for col in dandelion_df.columns:
dandelion_df.loc[dandelion_df[col] == "unassigned", col] = None

adata = read_airr(dandelion_df, **kwargs)

if transfer:
ddl.tl.transfer(adata, dandelion) # need to make a version that is not so verbose?
return adata
return to_scirpy(dandelion, transfer=transfer, to_mudata=to_mudata, **kwargs)


@_doc_params(doc_working_model=doc_working_model)
Expand Down
2 changes: 1 addition & 1 deletion src/scirpy/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_write_airr_none_field_issue_454(tmp_path):
write_airr(adata, tmp_path / "test.airr.tsv")


@pytest.mark.xfail(reason="Dandelion still uses `duplicate_count` instead of `umi_count`", raises=AssertionError)
# @pytest.mark.xfail(reason="Dandelion still uses `duplicate_count` instead of `umi_count`", raises=AssertionError)
@pytest.mark.extra
@pytest.mark.parametrize(
"anndata_from_10x_sample",
Expand Down

0 comments on commit acc228e

Please sign in to comment.