Skip to content

Commit

Permalink
Notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
straussmaximilian committed Jul 28, 2023
1 parent dbc09a7 commit 7321be6
Show file tree
Hide file tree
Showing 17 changed files with 2,064 additions and 0 deletions.
438 changes: 438 additions & 0 deletions sandbox/Figure_Notebooks/P_06_B Quant.ipynb

Large diffs are not rendered by default.

937 changes: 937 additions & 0 deletions sandbox/Figure_Notebooks/P_08B.ipynb

Large diffs are not rendered by default.

223 changes: 223 additions & 0 deletions sandbox/Figure_Notebooks/SI_01_TOP_N_Test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7f2b7cdf",
"metadata": {},
"source": [
"# Score Benchmarking"
]
},
{
"cell_type": "markdown",
"id": "6a63ee21",
"metadata": {},
"source": [
"For the first search , compare the performance for searching"
]
},
{
"cell_type": "markdown",
"id": "adef4dc0",
"metadata": {},
"source": [
"We don't want to rerun feature finding and file conversion again and again, so we create a copy and continue runs."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af15f7ca",
"metadata": {},
"outputs": [],
"source": [
"from alphapept.settings import load_settings\n",
"from alphapept.paths import DEFAULT_SETTINGS_PATH\n",
"import alphapept.interface\n",
"import alphapept.io\n",
"import matplotlib.pyplot as plt\n",
"import os \n",
" \n",
"BASE_PATH = 'F:/AP_Paper_Benchmark/PXD028735/top_n_optimization'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b3de785",
"metadata": {},
"outputs": [],
"source": [
"settings = load_settings(DEFAULT_SETTINGS_PATH)\n",
"\n",
"settings['experiment']['file_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.raw')]\n",
"settings['experiment']['fasta_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.fasta')]\n",
"\n",
"settings = alphapept.interface.import_raw_data(settings)\n",
"settings = alphapept.interface.feature_finding(settings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34a2e864",
"metadata": {},
"outputs": [],
"source": [
"import shutil\n",
"\n",
"_ = settings['experiment']['file_paths'][0]\n",
"base, ext = os.path.splitext(_)\n",
"ms_file_path = base+'.ms_data.hdf'\n",
"ms_file_path_bkup = ms_file_path+'.bkup'\n",
"\n",
"if os.path.isfile(ms_file_path_bkup):\n",
" os.remove(ms_file_path_bkup)\n",
"\n",
"os.rename(ms_file_path, ms_file_path_bkup)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a935628",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"from tqdm import tqdm as tqdm\n",
"import pandas as pd\n",
"\n",
"def set_settings(top_n, method, ini_score):\n",
" settings = load_settings(DEFAULT_SETTINGS_PATH)\n",
"\n",
" settings['workflow']['continue_runs'] = True\n",
"\n",
" settings['experiment']['file_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.raw')]\n",
" settings['experiment']['fasta_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.fasta')]\n",
" settings['search']['top_n'] = top_n\n",
" settings['score']['method'] = method\n",
" settings['score']['ml_ini_score'] = ini_score\n",
" \n",
" return settings\n",
"\n",
"settings_list = []\n",
"for top_n in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,18,20,25,30,35,40]:\n",
" for method in ['x_tandem','random_forest','generic_score','morpheus']:\n",
" if method == 'random_forest':\n",
" for ini_score in ['x_tandem','generic_score','hits']:\n",
" settings = set_settings(top_n, method, ini_score)\n",
" settings_list.append(settings)\n",
" else:\n",
" settings = set_settings(top_n, method, 'hits')\n",
" settings_list.append(settings)\n",
" \n",
"benchmark = []\n",
"\n",
"for settings in tqdm(settings_list):\n",
" \n",
" if os.path.isfile(ms_file_path):\n",
" os.remove(ms_file_path)\n",
" \n",
" shutil.copyfile(ms_file_path_bkup, ms_file_path)\n",
" \n",
" settings_ = alphapept.interface.run_complete_workflow(settings)\n",
"\n",
" _ = settings['experiment']['file_paths'][0]\n",
" base, ext = os.path.splitext(_)\n",
"\n",
" ms_file = alphapept.io.MS_Data_File(base+'.ms_data.hdf')\n",
"\n",
" df = pd.read_hdf(settings['experiment']['results_path'], 'protein_fdr')\n",
" \n",
" time = settings['summary']['timing']['total (min)']\n",
" decoy = df['decoy'].sum()\n",
" target = df['target'].sum()\n",
" top_n = settings['search']['top_n']\n",
" method = settings['score']['method']\n",
" ini_score = settings['score']['ml_ini_score']\n",
"\n",
" plt.show()\n",
" \n",
" benchmark.append((top_n, method, ini_score, target, decoy, time))\n",
" \n",
" if os.path.isfile(settings_['experiment']['results_path']):\n",
" os.remove(settings_['experiment']['results_path'])\n",
"\n",
"benchmark_df = pd.DataFrame(benchmark, columns = ['top_n','method','ini_score','target','decoy','time'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ff9185e",
"metadata": {},
"outputs": [],
"source": [
"benchmark_df['method_'] = benchmark_df.apply(lambda row: row['method'] + ' with ' + str(row['ini_score']) if row['method'] == 'random_forest' else row['method'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c6ecbec",
"metadata": {},
"outputs": [],
"source": [
"dark_blue = '#17212b'\n",
"light_blue = '#3dc5ef'\n",
"teal= '#42dee1'\n",
"green = '#6eecb9'\n",
"yellow = '#eef5b3'\n",
"hfont = {'fontname':'Arial', 'size':10}\n",
"\n",
"colors = [dark_blue, light_blue, teal, green, yellow]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11cf303b",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(7,7))\n",
"\n",
"sns.scatterplot(data=benchmark_df[benchmark_df['top_n'] > 1], x='top_n', y='target', hue='method_', alpha=0.5)\n",
"\n",
"plt.title('Top N vs number of identified precursors after FDR')\n",
"plt.xlabel('Top N')\n",
"plt.ylabel('Top N')\n",
"plt.tight_layout()\n",
"plt.xticks(**hfont)\n",
"plt.yticks(**hfont)\n",
"plt.ylim([0, 35000])\n",
"plt.legend(loc='lower right')\n",
"plt.savefig('figures/SI_01.pdf') \n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
297 changes: 297 additions & 0 deletions sandbox/Figure_Notebooks/SI_02 Score Benchmarking.ipynb

Large diffs are not rendered by default.

169 changes: 169 additions & 0 deletions sandbox/Figure_Notebooks/SI_Cloud_Timings.ipynb

Large diffs are not rendered by default.

Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Bruker.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Bruker_time.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Bruker_venn.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Thermo.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Thermo_time.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/08B_PXD028735_Thermo_venn.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/SI_02B_Bruker.pdf
Binary file not shown.
Binary file modified sandbox/Figure_Notebooks/figures/SI_02B_Thermo.pdf
Binary file not shown.

0 comments on commit 7321be6

Please sign in to comment.