From cce4fe98c81e9b59902a8466343fbb509fe3e2a4 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:12:43 +0100 Subject: [PATCH 1/5] rename .progress folder to quant --- alphadia/planning.py | 10 +++++----- alphadia/workflow/base.py | 13 +++++++------ alphadia/workflow/peptidecentric.py | 4 ++-- tests/unit_tests/test_outputaccumulator.py | 7 ++++--- tests/unit_tests/test_outputtransform.py | 7 ++++--- tests/unit_tests/test_workflow.py | 2 +- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/alphadia/planning.py b/alphadia/planning.py index 8518bab3..5dcb7ace 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -40,7 +40,7 @@ def __init__( fasta_path_list: list[str] | None = None, config: dict | None = None, config_base_path: str | None = None, - custom_quant_dir: str | None = None, + quant_path: str | None = None, ) -> None: """Highest level class to plan a DIA Search. Owns the input file list, speclib and the config. @@ -60,8 +60,8 @@ def __init__( config_update : dict, optional dict to update the default config. Can be used for debugging purposes etc. - custom_quant_dir : str, optional - directory to save the quantification results (psm & frag parquet files). If not provided, the results are saved in the usual workflow folder + quant_path : str, optional + path to directory to save the quantification results (psm & frag parquet files). If not provided, the results are saved in the usual workflow folder """ if config is None: @@ -84,7 +84,7 @@ def __init__( self.raw_path_list = raw_path_list self.library_path = library_path self.fasta_path_list = fasta_path_list - self.custom_quant_dir = custom_quant_dir + self.quant_path = quant_path logger.progress(f"version: {alphadia.__version__}") @@ -323,7 +323,7 @@ def run( workflow = peptidecentric.PeptideCentricWorkflow( raw_name, self.config, - custom_temp_folder=self.custom_quant_dir, + quant_path=self.quant_path, ) # check if the raw file is already processed diff --git a/alphadia/workflow/base.py b/alphadia/workflow/base.py index 565e4a90..09530ea6 100644 --- a/alphadia/workflow/base.py +++ b/alphadia/workflow/base.py @@ -14,7 +14,8 @@ logger = logging.getLogger() -TEMP_FOLDER = ".progress" +QUANT_FOLDER_NAME = "quant" + class WorkflowBase: @@ -32,7 +33,7 @@ def __init__( self, instance_name: str, config: dict, - custom_temp_folder: str = None, + quant_path: str = None, ) -> None: """ Parameters @@ -44,13 +45,13 @@ def __init__( config: dict Configuration for the workflow. This will be used to initialize the calibration manager and fdr manager - custom_temp_folder: str - custom parent_path for workflow folders, relevant for distributed searches + quant_path: str + path to directory holding quant folders, relevant for distributed searches """ self._instance_name: str = instance_name - self._parent_path: str = custom_temp_folder or os.path.join( - config["output"], TEMP_FOLDER + self._parent_path: str = quant_path or os.path.join( + config["output"], QUANT_FOLDER_NAME ) self._config: dict = config self.reporter: reporting.Pipeline | None = None diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index b36858c8..4b00e476 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -103,12 +103,12 @@ def __init__( self, instance_name: str, config: dict, - custom_temp_folder: str = None, + quant_path: str = None, ) -> None: super().__init__( instance_name, config, - custom_temp_folder, + quant_path, ) self.optlock = None diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py index 5b4c683e..896aa06d 100644 --- a/tests/unit_tests/test_outputaccumulator.py +++ b/tests/unit_tests/test_outputaccumulator.py @@ -8,6 +8,7 @@ from conftest import mock_fragment_df, mock_precursor_df from alphadia import outputtransform +from alphadia.workflow.base import QUANT_FOLDER_NAME def prepare_input_data(): @@ -64,11 +65,11 @@ def prepare_input_data(): temp_folder = os.path.join(tempfile.gettempdir(), "alphadia") os.makedirs(temp_folder, exist_ok=True) - progress_folder = os.path.join(temp_folder, "progress") - os.makedirs(progress_folder, exist_ok=True) + quant_path = os.path.join(temp_folder, QUANT_FOLDER_NAME) + os.makedirs(quant_path, exist_ok=True) # setup raw folders - raw_folders = [os.path.join(progress_folder, run) for run in run_columns] + raw_folders = [os.path.join(quant_path, run) for run in run_columns] psm_base_df = mock_precursor_df(n_precursor=100, with_decoy=True) fragment_base_df = mock_fragment_df(n_precursor=200, n_fragments=10) diff --git a/tests/unit_tests/test_outputtransform.py b/tests/unit_tests/test_outputtransform.py index 23f5da92..947fedbb 100644 --- a/tests/unit_tests/test_outputtransform.py +++ b/tests/unit_tests/test_outputtransform.py @@ -8,6 +8,7 @@ from alphadia import outputtransform from alphadia.workflow import manager, peptidecentric +from alphadia.workflow.base import QUANT_FOLDER_NAME def test_output_transform(): @@ -54,11 +55,11 @@ def test_output_transform(): temp_folder = os.path.join(tempfile.gettempdir(), "alphadia") os.makedirs(temp_folder, exist_ok=True) - progress_folder = os.path.join(temp_folder, "progress") - os.makedirs(progress_folder, exist_ok=True) + quant_path = os.path.join(temp_folder, QUANT_FOLDER_NAME) + os.makedirs(quant_path, exist_ok=True) # setup raw folders - raw_folders = [os.path.join(progress_folder, run) for run in run_columns] + raw_folders = [os.path.join(quant_path, run) for run in run_columns] psm_base_df = mock_precursor_df(n_precursor=100) fragment_base_df = mock_fragment_df(n_precursor=200) diff --git a/tests/unit_tests/test_workflow.py b/tests/unit_tests/test_workflow.py index 1e289b13..2ca58666 100644 --- a/tests/unit_tests/test_workflow.py +++ b/tests/unit_tests/test_workflow.py @@ -320,7 +320,7 @@ def test_workflow_base(): assert my_workflow.config["output"] == config["output"] assert my_workflow.instance_name == workflow_name assert my_workflow.parent_path == os.path.join( - config["output"], base.TEMP_FOLDER + config["output"], base.QUANT_FOLDER_NAME ) assert my_workflow.path == os.path.join( my_workflow.parent_path, workflow_name From bb2c682105020d98e07261ef55ad34488130b247 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:13:24 +0100 Subject: [PATCH 2/5] rename input parameter to quant-dir --- alphadia/cli.py | 49 ++++++++++++++------------------ misc/distributed_search/inner.sh | 4 +-- misc/distributed_search/outer.sh | 8 +++--- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/alphadia/cli.py b/alphadia/cli.py index 0b16a881..368e8367 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -96,7 +96,7 @@ default="{}", ) parser.add_argument( - "--custom-quant-dir", + "--quant-dir", type=str, help="Directory to save the quantification results (psm & frag parquet files) to be reused in a distributed search", nargs="?", @@ -172,10 +172,10 @@ def parse_output_directory(args: argparse.Namespace, config: dict) -> str: return output_directory -def parse_custom_quant_dir(args: argparse.Namespace, config: dict) -> str: - """Parse custom quant directory. - 1. Use custom quant directory from config file if specified. - 2. Use custom quant directory from command line if specified. +def parse_quant_dir(args: argparse.Namespace, config: dict) -> str: + """Parse custom quant path. + 1. Use custom quant path from config file if specified. + 2. Use custom quant path from command line if specified. Parameters ---------- @@ -189,26 +189,23 @@ def parse_custom_quant_dir(args: argparse.Namespace, config: dict) -> str: Returns ------- - custom_quant_dir : str - Custom quant directory. + quant_dir : str + path to quant directory. """ - custom_quant_dir = None - if "custom_quant_dir" in config: - custom_quant_dir = ( - utils.windows_to_wsl(config["custom_quant_dir"]) + quant_dir = None + if "quant_dir" in config: + quant_dir = ( + utils.windows_to_wsl(config["quant_dir"]) if args.wsl - else config["custom_quant_dir"] + else config["quant_dir"] ) - if args.custom_quant_dir is not None: - custom_quant_dir = ( - utils.windows_to_wsl(args.custom_quant_dir) - if args.wsl - else args.custom_quant_dir - ) + if args.quant_dir is not None: + quant_dir = utils.windows_to_wsl(args.quant_dir) if args.wsl else args.quant_dir + + return quant_dir - return custom_quant_dir def parse_raw_path_list(args: argparse.Namespace, config: dict) -> list: @@ -349,7 +346,7 @@ def run(*args, **kwargs): print("No output directory specified.") return - custom_quant_dir = parse_custom_quant_dir(args, config) + quant_dir = parse_quant_dir(args, config) reporting.init_logging(output_directory) raw_path_list = parse_raw_path_list(args, config) @@ -368,10 +365,8 @@ def run(*args, **kwargs): logger.progress(f" {f}") logger.progress(f"Saving output to: {output_directory}") - if custom_quant_dir is not None: - logger.progress( - f"Saving quantification output to 'custom_quant_dir': {custom_quant_dir}" - ) + if quant_dir is not None: + logger.progress(f"Saving quantification output to {quant_dir=}") try: import matplotlib @@ -387,7 +382,7 @@ def run(*args, **kwargs): library_path=library_path, fasta_path_list=fasta_path_list, config=config, - custom_quant_dir=custom_quant_dir, + quant_path=quant_dir, ) plan.run() @@ -406,5 +401,5 @@ def run(*args, **kwargs): # uncomment for debugging: -# if __name__ == "__main__": -# run() +if __name__ == "__main__": + run() diff --git a/misc/distributed_search/inner.sh b/misc/distributed_search/inner.sh index f31c96e3..ad2c1c08 100755 --- a/misc/distributed_search/inner.sh +++ b/misc/distributed_search/inner.sh @@ -16,10 +16,10 @@ cd $chunk_directory || exit config_filename="config.yaml" # run with or without custom quant_dir -if [ -z "${custom_quant_dir}" ]; then +if [ -z "${quant_dir}" ]; then alphadia --config ${config_filename} else - alphadia --config ${config_filename} --custom-quant-dir ${custom_quant_dir} + alphadia --config ${config_filename} --quant-dir ${quant_dir} fi echo "AlphaDIA completed successfully" diff --git a/misc/distributed_search/outer.sh b/misc/distributed_search/outer.sh index 188f96aa..b7894063 100755 --- a/misc/distributed_search/outer.sh +++ b/misc/distributed_search/outer.sh @@ -76,7 +76,7 @@ mkdir -p ${first_search_directory} mbr_library_directory="${target_directory}/3_mbr_library" mkdir -p ${mbr_library_directory} -mbr_progress_directory="${target_directory}/3_mbr_library/chunk_0/.progress" +mbr_progress_directory="${target_directory}/3_mbr_library/chunk_0/quant" mkdir -p ${mbr_progress_directory} second_search_directory="${target_directory}/4_second_search" @@ -85,7 +85,7 @@ mkdir -p ${second_search_directory} lfq_directory="${target_directory}/5_lfq" mkdir -p ${lfq_directory} -lfq_progress_directory="${target_directory}/5_lfq/chunk_0/.progress" +lfq_progress_directory="${target_directory}/5_lfq/chunk_0/quant" mkdir -p ${lfq_progress_directory} ### PREDICT LIBRARY ### @@ -150,7 +150,7 @@ if [[ "$first_search" -eq 1 ]]; then --ntasks-per-node=${ntasks_per_node} \ --cpus-per-task=${cpus} \ --mem=${mem} \ - --export=ALL,target_directory=${first_search_directory},custom_quant_dir=${mbr_progress_directory} ./inner.sh + --export=ALL,target_directory=${first_search_directory},quant_dir=${mbr_progress_directory} ./inner.sh else echo "Skipping first search" fi @@ -211,7 +211,7 @@ if [[ "$second_search" -eq 1 ]]; then --ntasks-per-node=${ntasks_per_node} \ --cpus-per-task=${cpus} \ --mem=${mem} \ - --export=ALL,target_directory=${second_search_directory},custom_quant_dir=${lfq_progress_directory} ./inner.sh + --export=ALL,target_directory=${second_search_directory},quant_dir=${lfq_progress_directory} ./inner.sh else echo "Skipping second search" fi From f835043e1994b54208b0fefdcd0a9920031ddba0 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:14:42 +0100 Subject: [PATCH 3/5] add a TODO --- alphadia/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alphadia/cli.py b/alphadia/cli.py index 368e8367..2842b1e6 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -364,6 +364,7 @@ def run(*args, **kwargs): for f in fasta_path_list: logger.progress(f" {f}") + # TODO rename all output_directory, output_folder => output_path, quant_dir->quant_path (except cli parameter) logger.progress(f"Saving output to: {output_directory}") if quant_dir is not None: logger.progress(f"Saving quantification output to {quant_dir=}") From 06aa48901a858777add91a7f75c10727043d6e03 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:18:20 +0100 Subject: [PATCH 4/5] revert accidental commit --- alphadia/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alphadia/cli.py b/alphadia/cli.py index 2842b1e6..b35f27e6 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -402,5 +402,5 @@ def run(*args, **kwargs): # uncomment for debugging: -if __name__ == "__main__": - run() +# if __name__ == "__main__": +# run() From 1b0a1387d4e782b70f19ba0ecaade937ef483dd0 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:06:14 +0100 Subject: [PATCH 5/5] formatting --- alphadia/cli.py | 1 - alphadia/workflow/base.py | 1 - 2 files changed, 2 deletions(-) diff --git a/alphadia/cli.py b/alphadia/cli.py index b35f27e6..4702d555 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -207,7 +207,6 @@ def parse_quant_dir(args: argparse.Namespace, config: dict) -> str: return quant_dir - def parse_raw_path_list(args: argparse.Namespace, config: dict) -> list: """Parse raw file list. 1. Use raw file list from config file if specified. diff --git a/alphadia/workflow/base.py b/alphadia/workflow/base.py index 09530ea6..9821f5a6 100644 --- a/alphadia/workflow/base.py +++ b/alphadia/workflow/base.py @@ -17,7 +17,6 @@ QUANT_FOLDER_NAME = "quant" - class WorkflowBase: """Base class for all workflows. This class is responsible for creating the workflow folder. It also initializes the calibration_manager and fdr_manager for the workflow.