Skip to content

Commit

Permalink
Merging staging branch into prod branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Bento007 committed Jun 14, 2024
2 parents 8d24c9a + e8123c6 commit 3b35405
Show file tree
Hide file tree
Showing 56 changed files with 1,227 additions and 611 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@
ParsedAsctbTableEntry,
Reference,
)
from backend.cellguide.pipeline.canonical_marker_genes.utils import (
clean_doi,
get_title_and_citation_from_doi,
)
from backend.cellguide.pipeline.constants import ASCTB_MASTER_SHEET_URL, CELLGUIDE_PIPELINE_NUM_CPUS
from backend.common.census_cube.data.ontology_labels import ontology_term_label
from backend.common.census_cube.utils import setup_retry_session
from backend.common.doi import clean_doi
from backend.common.marker_genes.marker_gene_files.gene_metadata import get_gene_id_to_name_and_symbol
from backend.common.providers.crossref_provider import CrossrefProvider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,6 +51,7 @@ def __init__(self, *, wmg_tissues: list[str], wmg_human_genes: list[str]):

gene_metadata = get_gene_id_to_name_and_symbol()
self.gene_id_to_name = gene_metadata.gene_id_to_name
self.crossref_provider = CrossrefProvider()

def _get_tissue_id(self, anatomical_structures: list[AnatomicalStructure]) -> str:
"""
Expand Down Expand Up @@ -132,7 +131,7 @@ def fetch_doi_info(ref):
doi = clean_doi(ref.doi)
if doi:
if doi not in doi_to_citation:
title = get_title_and_citation_from_doi(doi)
title = self.crossref_provider.get_title_and_citation_from_doi(doi)
doi_to_citation[doi] = title
else:
title = doi_to_citation[doi]
Expand Down
131 changes: 0 additions & 131 deletions backend/cellguide/pipeline/canonical_marker_genes/utils.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from backend.cellguide.pipeline.canonical_marker_genes.utils import format_citation_dp
from backend.cellguide.pipeline.source_collections.types import SourceCollectionsData
from backend.common.census_cube.utils import (
descendants,
get_collections_from_discover_api,
get_datasets_from_discover_api,
)
from backend.common.citation import format_citation_dp


def generate_source_collections_data(all_cell_type_ids_in_corpus: list[str]) -> dict[str, list[SourceCollectionsData]]:
Expand Down
52 changes: 52 additions & 0 deletions backend/common/citation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
def format_citation_dp(message: dict) -> str:
"""
Formats the citation message.
Parameters
----------
message : dict
The message containing publisher_metadata from the /collections API.
Returns
-------
str
The formatted citation string.
"""
author_str_suffix = ""
if len(message["authors"]) > 1:
author_str_suffix = " et al."
first_author = message["authors"][0]
author_str = f"{first_author['family']}" if "family" in first_author else f"{first_author['name']}"
author_str += author_str_suffix

journal = message["journal"] if message["journal"] else ""
year = f"{message['published_year']}"

return f"{author_str} ({year}) {journal}"


def format_citation_crossref(message: dict) -> str:
"""
Formats the citation message.
Parameters
----------
message : dict
The message containing citation details output from CrossRef.
Returns
-------
str
The formatted citation string.
"""
author_str_suffix = ""
if len(message["author"]) > 1:
author_str_suffix = " et al."
first_author = message["author"][0]
author_str = f"{first_author['family']}" if "family" in first_author else f"{first_author['name']}"
author_str += author_str_suffix

journal = message["container-title"][0] if len(message["container-title"]) else ""
year = message["created"]["date-parts"][0][0]

return f"{author_str} ({year}) {journal}"
26 changes: 26 additions & 0 deletions backend/layers/common/doi.py → backend/common/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,29 @@ def portal_get_normalized_doi_url(doi_node: dict, errors: list) -> Optional[str]
return None
doi_url = f"https://doi.org/{parsed_doi}"
return doi_url


def clean_doi(doi: str) -> str:
"""
Cleans the DOI string.
Parameters
----------
doi : str
The DOI string to be cleaned.
Returns
-------
str
The cleaned DOI string.
"""
doi = doi.strip()
if doi == "No DOI":
return ""

if doi != "" and doi[-1] == ".":
doi = doi[:-1]
if " " in doi:
doi = doi.split(" ")[1] # this handles cases where the DOI string is "DOI: {doi}"
doi = doi.strip()
return doi
Loading

0 comments on commit 3b35405

Please sign in to comment.