generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dbc09a7
commit 7321be6
Showing
17 changed files
with
2,064 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "7f2b7cdf", | ||
"metadata": {}, | ||
"source": [ | ||
"# Score Benchmarking" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "6a63ee21", | ||
"metadata": {}, | ||
"source": [ | ||
"For the first search , compare the performance for searching" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "adef4dc0", | ||
"metadata": {}, | ||
"source": [ | ||
"We don't want to rerun feature finding and file conversion again and again, so we create a copy and continue runs." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "af15f7ca", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from alphapept.settings import load_settings\n", | ||
"from alphapept.paths import DEFAULT_SETTINGS_PATH\n", | ||
"import alphapept.interface\n", | ||
"import alphapept.io\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import os \n", | ||
" \n", | ||
"BASE_PATH = 'F:/AP_Paper_Benchmark/PXD028735/top_n_optimization'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "4b3de785", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"settings = load_settings(DEFAULT_SETTINGS_PATH)\n", | ||
"\n", | ||
"settings['experiment']['file_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.raw')]\n", | ||
"settings['experiment']['fasta_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.fasta')]\n", | ||
"\n", | ||
"settings = alphapept.interface.import_raw_data(settings)\n", | ||
"settings = alphapept.interface.feature_finding(settings)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "34a2e864", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import shutil\n", | ||
"\n", | ||
"_ = settings['experiment']['file_paths'][0]\n", | ||
"base, ext = os.path.splitext(_)\n", | ||
"ms_file_path = base+'.ms_data.hdf'\n", | ||
"ms_file_path_bkup = ms_file_path+'.bkup'\n", | ||
"\n", | ||
"if os.path.isfile(ms_file_path_bkup):\n", | ||
" os.remove(ms_file_path_bkup)\n", | ||
"\n", | ||
"os.rename(ms_file_path, ms_file_path_bkup)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "2a935628", | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from tqdm import tqdm as tqdm\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"def set_settings(top_n, method, ini_score):\n", | ||
" settings = load_settings(DEFAULT_SETTINGS_PATH)\n", | ||
"\n", | ||
" settings['workflow']['continue_runs'] = True\n", | ||
"\n", | ||
" settings['experiment']['file_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.raw')]\n", | ||
" settings['experiment']['fasta_paths'] = [os.path.join(BASE_PATH, _) for _ in os.listdir('F:/AP_Paper_Benchmark/PXD028735/top_n_optimization') if _.endswith('.fasta')]\n", | ||
" settings['search']['top_n'] = top_n\n", | ||
" settings['score']['method'] = method\n", | ||
" settings['score']['ml_ini_score'] = ini_score\n", | ||
" \n", | ||
" return settings\n", | ||
"\n", | ||
"settings_list = []\n", | ||
"for top_n in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,18,20,25,30,35,40]:\n", | ||
" for method in ['x_tandem','random_forest','generic_score','morpheus']:\n", | ||
" if method == 'random_forest':\n", | ||
" for ini_score in ['x_tandem','generic_score','hits']:\n", | ||
" settings = set_settings(top_n, method, ini_score)\n", | ||
" settings_list.append(settings)\n", | ||
" else:\n", | ||
" settings = set_settings(top_n, method, 'hits')\n", | ||
" settings_list.append(settings)\n", | ||
" \n", | ||
"benchmark = []\n", | ||
"\n", | ||
"for settings in tqdm(settings_list):\n", | ||
" \n", | ||
" if os.path.isfile(ms_file_path):\n", | ||
" os.remove(ms_file_path)\n", | ||
" \n", | ||
" shutil.copyfile(ms_file_path_bkup, ms_file_path)\n", | ||
" \n", | ||
" settings_ = alphapept.interface.run_complete_workflow(settings)\n", | ||
"\n", | ||
" _ = settings['experiment']['file_paths'][0]\n", | ||
" base, ext = os.path.splitext(_)\n", | ||
"\n", | ||
" ms_file = alphapept.io.MS_Data_File(base+'.ms_data.hdf')\n", | ||
"\n", | ||
" df = pd.read_hdf(settings['experiment']['results_path'], 'protein_fdr')\n", | ||
" \n", | ||
" time = settings['summary']['timing']['total (min)']\n", | ||
" decoy = df['decoy'].sum()\n", | ||
" target = df['target'].sum()\n", | ||
" top_n = settings['search']['top_n']\n", | ||
" method = settings['score']['method']\n", | ||
" ini_score = settings['score']['ml_ini_score']\n", | ||
"\n", | ||
" plt.show()\n", | ||
" \n", | ||
" benchmark.append((top_n, method, ini_score, target, decoy, time))\n", | ||
" \n", | ||
" if os.path.isfile(settings_['experiment']['results_path']):\n", | ||
" os.remove(settings_['experiment']['results_path'])\n", | ||
"\n", | ||
"benchmark_df = pd.DataFrame(benchmark, columns = ['top_n','method','ini_score','target','decoy','time'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "0ff9185e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"benchmark_df['method_'] = benchmark_df.apply(lambda row: row['method'] + ' with ' + str(row['ini_score']) if row['method'] == 'random_forest' else row['method'], axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7c6ecbec", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dark_blue = '#17212b'\n", | ||
"light_blue = '#3dc5ef'\n", | ||
"teal= '#42dee1'\n", | ||
"green = '#6eecb9'\n", | ||
"yellow = '#eef5b3'\n", | ||
"hfont = {'fontname':'Arial', 'size':10}\n", | ||
"\n", | ||
"colors = [dark_blue, light_blue, teal, green, yellow]\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "11cf303b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"plt.figure(figsize=(7,7))\n", | ||
"\n", | ||
"sns.scatterplot(data=benchmark_df[benchmark_df['top_n'] > 1], x='top_n', y='target', hue='method_', alpha=0.5)\n", | ||
"\n", | ||
"plt.title('Top N vs number of identified precursors after FDR')\n", | ||
"plt.xlabel('Top N')\n", | ||
"plt.ylabel('Top N')\n", | ||
"plt.tight_layout()\n", | ||
"plt.xticks(**hfont)\n", | ||
"plt.yticks(**hfont)\n", | ||
"plt.ylim([0, 35000])\n", | ||
"plt.legend(loc='lower right')\n", | ||
"plt.savefig('figures/SI_01.pdf') \n", | ||
"plt.show()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
297 changes: 297 additions & 0 deletions
297
sandbox/Figure_Notebooks/SI_02 Score Benchmarking.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file modified
BIN
-565 Bytes
(96%)
sandbox/Figure_Notebooks/figures/08B_PXD028735_Bruker_time.pdf
Binary file not shown.
Binary file modified
BIN
-360 Bytes
(98%)
sandbox/Figure_Notebooks/figures/08B_PXD028735_Bruker_venn.pdf
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
-570 Bytes
(96%)
sandbox/Figure_Notebooks/figures/08B_PXD028735_Thermo_time.pdf
Binary file not shown.
Binary file modified
BIN
-390 Bytes
(97%)
sandbox/Figure_Notebooks/figures/08B_PXD028735_Thermo_venn.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.