From eb9c3a005a8460925ef8b9596e363a8a2b46f3d8 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Fri, 8 Nov 2024 17:44:48 +0100 Subject: [PATCH] :art: make data work independent of location, label plots better --- project/04_1_train_pimms_models.ipynb | 17 +++++++++-------- project/04_1_train_pimms_models.py | 11 ++++++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/project/04_1_train_pimms_models.ipynb b/project/04_1_train_pimms_models.ipynb index 5cd609add..7fb5aefa4 100644 --- a/project/04_1_train_pimms_models.ipynb +++ b/project/04_1_train_pimms_models.ipynb @@ -55,12 +55,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "IN_COLAB = 'COLAB_GPU' in os.environ\n", + "from pathlib import Path\n", "\n", - "fn_intensities = 'data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv'\n", - "if IN_COLAB:\n", + "fn_intensities = Path('data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv')\n", + "if not fn_intensities.exists():\n", + " print(\"Use example data from GitHub.\")\n", " fn_intensities = ('https://raw.githubusercontent.com/RasmussenLab/pimms/main/'\n", " 'project/data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv')" ] @@ -651,7 +650,8 @@ " feat_medians=splits.train_X.median(),\n", " ax=ax,\n", " metric_name='MAE',\n", - " palette=color_model_mapping)" + " palette=color_model_mapping)\n", + " ax.set_ylabel('Mean aboslute error (MAE)')" ] }, { @@ -724,7 +724,8 @@ " ),\n", " color=color_model_mapping[model_selected],\n", " alpha=1)\n", - "_ = ax.legend()" + "_ = ax.legend()\n", + "ax.set_xlabel('log2 intensity bin')" ] } ], @@ -749,7 +750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.11.9" }, "mystnb": { "execution_raise_on_error": true, diff --git a/project/04_1_train_pimms_models.py b/project/04_1_train_pimms_models.py index a89c656f5..d193686eb 100644 --- a/project/04_1_train_pimms_models.py +++ b/project/04_1_train_pimms_models.py @@ -30,12 +30,11 @@ # Specify example data: # %% -import os - -IN_COLAB = 'COLAB_GPU' in os.environ +from pathlib import Path -fn_intensities = 'data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv' -if IN_COLAB: +fn_intensities = Path('data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv') +if not fn_intensities.exists(): + print("Use example data from GitHub.") fn_intensities = ('https://raw.githubusercontent.com/RasmussenLab/pimms/main/' 'project/data/dev_datasets/HeLa_6070/protein_groups_wide_N50.csv') @@ -308,6 +307,7 @@ ax=ax, metric_name='MAE', palette=color_model_mapping) + ax.set_ylabel('Mean aboslute error (MAE)') # %% [markdown] # replace predicted values with validation data values @@ -349,3 +349,4 @@ color=color_model_mapping[model_selected], alpha=1) _ = ax.legend() +ax.set_xlabel('log2 intensity bin')