Skip to content

Commit

Permalink
fix read_protein_set to use source_path=self.dataDir
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Araripe committed Jan 8, 2024
1 parent 0d4c498 commit 9132cc1
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions qsprpred/data/sources/papyrus/papyrus_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Papyrus(DataSource):
Attributes:
DEFAULT_DIR (str): default directory for Papyrus database and the extracted data
dataDir (str): storage directory for Papyrus database and the extracted data
_papyrusDir (str): directory where the Papyrus database is located, os.path.join(dataDir, "papyrus")
version (list): Papyrus database version
descriptors (list, str, None): descriptors to download if not already present
stereo (bool): use version with stereochemistry
Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(
use only plusplus version, only high quality data
"""
self.dataDir = data_dir
self._papyrusDir = os.path.join(self.dataDir, "papyrus")
self.version = version
self.descriptors = descriptors
self.stereo = stereo
Expand All @@ -73,8 +75,8 @@ def download(self):
Only newly requested data is downloaded. Remove the files if you want to
reload the data completely.
"""
os.makedirs(self.dataDir, exist_ok=True)
if not os.path.exists(os.path.join(self.dataDir, "papyrus")):
if not os.path.exists(self._papyrusDir):
os.makedirs(self.dataDir, exist_ok=True)
logger.info("Downloading Papyrus database...")
download_papyrus(
outdir=self.dataDir,
Expand All @@ -87,10 +89,8 @@ def download(self):
)
else:
logger.info(
f"Papyrus database already"
f" downloaded. Using existing data. "
f"Delete the following folder to reload the data: "
f"{os.path.join(self.dataDir, 'papyrus')}"
"Papyrus database already downloaded. Using existing data. "
f"Delete the following folder to reload the data: {self._papyrusDir}"
)

def getData(
Expand Down Expand Up @@ -121,7 +121,7 @@ def getData(
Returns:
MolculeTable: the filtered data set
"""
logger.debug(f"Getting data from Papyrus data source...")
logger.debug("Getting data from Papyrus data source...")
assert acc_keys is not None, "Please provide a list of accession keys."
name = name or "papyrus"
self.download()
Expand All @@ -143,7 +143,7 @@ def getData(
plusplus=self.plusplus,
papyrus_dir=self.dataDir,
)
logger.debug(f"Finished filtering Papyrus data set.")
logger.debug("Finished filtering Papyrus data set.")
logger.debug(f"Creating MoleculeTable from '{path}'.")
ret = MoleculeTable.fromTableFile(name, path, store_dir=output_dir, **kwargs)
logger.debug(f"Finished creating MoleculeTable from '{path}'.")
Expand Down Expand Up @@ -175,7 +175,9 @@ def getProteinData(
if os.path.exists(path) and use_existing:
return pd.read_table(path)
else:
protein_data = papyrus_scripts.read_protein_set(version=self.version)
protein_data = papyrus_scripts.read_protein_set(
source_path=self.dataDir, version=self.version
)
protein_data["accession"] = protein_data["target_id"].apply(
lambda x: x.split("_")[0]
)
Expand Down

0 comments on commit 9132cc1

Please sign in to comment.