Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

20240801 complete fred docking #18

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
Bootstraps,StructureChoice,StructureChoice_Choose_N,Score,Score_Choose_N,EvaluationMetric,EvaluationMetric_Cutoff,Split,N_Per_Split,PoseSelection,PoseSelection_Choose_N,Min,Max,CI_Upper,CI_Lower,Total,Fraction
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,1,Default,1,0.0,0.6666666666666666,0.6323529411764706,0.014705882352941176,204,0.4617009803921568
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,5,Default,1,0.5853658536585366,0.8585365853658536,0.824390243902439,0.6536585365853659,205,0.7545268292682926
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,10,Default,1,0.5804878048780487,0.8926829268292683,0.8634146341463415,0.7268292682926829,205,0.8134634146341465
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,15,Default,1,0.6780487804878049,0.9073170731707317,0.8829268292682927,0.7609756097560976,205,0.8386731707317074
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,20,Default,1,0.7463414634146341,0.9073170731707317,0.8926829268292683,0.7804878048780488,205,0.8525707317073172
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,25,Default,1,0.751219512195122,0.9170731707317074,0.8975609756097561,0.7951219512195122,205,0.8604097560975611
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,30,Default,1,0.7804878048780488,0.9219512195121952,0.9073170731707317,0.8048780487804879,205,0.866487804878049
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,35,Default,1,0.7707317073170732,0.9219512195121952,0.9073170731707317,0.8097560975609757,205,0.8716682926829269
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,40,Default,1,0.7658536585365854,0.926829268292683,0.9121951219512195,0.8195121951219512,205,0.8756439024390245
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,45,Default,1,0.7804878048780488,0.9219512195121952,0.9121951219512195,0.824390243902439,205,0.8790829268292685
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,50,Default,1,0.7951219512195122,0.9317073170731708,0.9121951219512195,0.8292682926829268,205,0.8809853658536588
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,55,Default,1,0.8146341463414634,0.9317073170731708,0.9170731707317074,0.8439024390243902,205,0.884429268292683
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,60,Default,1,0.8146341463414634,0.9317073170731708,0.9170731707317074,0.8439024390243902,205,0.8852878048780489
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,65,Default,1,0.8097560975609757,0.9365853658536586,0.9219512195121952,0.8439024390243902,205,0.887639024390244
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,70,Default,1,0.8146341463414634,0.9317073170731708,0.9170731707317074,0.8536585365853658,205,0.8892243902439027
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,75,Default,1,0.824390243902439,0.9365853658536586,0.9219512195121952,0.8536585365853658,205,0.8916146341463416
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,80,Default,1,0.8390243902439024,0.9317073170731708,0.9219512195121952,0.8536585365853658,205,0.8916195121951221
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,85,Default,1,0.8292682926829268,0.9365853658536586,0.9219512195121952,0.8585365853658536,205,0.8937756097560977
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,90,Default,1,0.8390243902439024,0.9414634146341463,0.926829268292683,0.8585365853658536,205,0.8949609756097564
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,95,Default,1,0.848780487804878,0.9463414634146341,0.9219512195121952,0.8634146341463415,205,0.894907317073171
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,100,Default,1,0.848780487804878,0.9414634146341463,0.9219512195121952,0.8634146341463415,205,0.8969317073170733
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,105,Default,1,0.848780487804878,0.9317073170731708,0.9219512195121952,0.8682926829268293,205,0.8976585365853662
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,110,Default,1,0.8439024390243902,0.9414634146341463,0.926829268292683,0.8682926829268293,205,0.8995707317073173
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,115,Default,1,0.848780487804878,0.9414634146341463,0.926829268292683,0.8731707317073171,205,0.8990439024390245
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,120,Default,1,0.8536585365853658,0.9463414634146341,0.9219512195121952,0.8731707317073171,205,0.9007121951219514
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,125,Default,1,0.8585365853658536,0.9365853658536586,0.926829268292683,0.8731707317073171,205,0.9010439024390247
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,130,Default,1,0.8585365853658536,0.9414634146341463,0.926829268292683,0.8731707317073171,205,0.9012634146341466
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,135,Default,1,0.8585365853658536,0.9414634146341463,0.926829268292683,0.8780487804878049,205,0.9027024390243905
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,140,Default,1,0.8634146341463415,0.9414634146341463,0.926829268292683,0.8780487804878049,205,0.9025902439024391
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,145,Default,1,0.8536585365853658,0.9365853658536586,0.926829268292683,0.8829268292682927,205,0.9041219512195123
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,150,Default,1,0.8682926829268293,0.9317073170731708,0.926829268292683,0.8829268292682927,205,0.9052000000000002
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,155,Default,1,0.8634146341463415,0.9317073170731708,0.926829268292683,0.8829268292682927,205,0.9050926829268294
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,160,Default,1,0.8682926829268293,0.9365853658536586,0.9219512195121952,0.8829268292682927,205,0.9064878048780489
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,165,Default,1,0.8682926829268293,0.9365853658536586,0.926829268292683,0.8878048780487805,205,0.9069463414634149
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,170,Default,1,0.8731707317073171,0.9365853658536586,0.926829268292683,0.8878048780487805,205,0.9079219512195122
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,175,Default,1,0.8731707317073171,0.9365853658536586,0.926829268292683,0.8926829268292683,205,0.9083804878048782
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,180,Default,1,0.8829268292682927,0.9365853658536586,0.926829268292683,0.8926829268292683,205,0.9088390243902441
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,185,Default,1,0.8780487804878049,0.9365853658536586,0.926829268292683,0.8926829268292683,205,0.9096585365853662
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,190,Default,1,0.8829268292682927,0.9317073170731708,0.926829268292683,0.8975609756097561,205,0.9103268292682929
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,195,Default,1,0.8829268292682927,0.9317073170731708,0.926829268292683,0.8975609756097561,205,0.9116390243902441
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,200,Default,1,0.8926829268292683,0.9317073170731708,0.9219512195121952,0.8975609756097561,205,0.9116536585365856
1000,Dock_to_All,All,POSIT_Probability,1,RMSD,2.0,RandomSplit,205,Default,1,0.9170731707317074,0.9170731707317074,0.9170731707317074,0.9170731707317074,205,0.9170731707317077
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
date_dict_path: /data/chodera/paynea/asapdiscovery-sars-retrospective/science/20240403_multi_pose_docking_v2/20240430_analyze_cross_docking_results/20240503_inputs_analysis/date_dict.json
n_bootstraps: 1000
n_per_split:
- 1
- 5
- 10
- 15
- 20
- 25
- 30
- 35
- 40
- 45
- 50
- 55
- 60
- 65
- 70
- 75
- 80
- 85
- 90
- 95
- 100
- 105
- 110
- 115
- 120
- 125
- 130
- 135
- 140
- 145
- 150
- 155
- 160
- 165
- 170
- 175
- 180
- 185
- 190
- 195
- 200
- 205
n_poses:
- 1
- 2
- 5
- 10
- 20
- 50
n_structures:
- 1
- 2
- 5
- 10
pose_id_column: Pose_ID
query_ligand_column: Query_Ligand
reference_ligand_column: Reference_Ligand
reference_structure_column: Reference_Structure
rmsd_cutoff: 2.0
9 changes: 9 additions & 0 deletions science/20240801_complete_fred_docking/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
In order to compare the results we get from referenced-based docking to a more typical docking approach, I'd like to run complete cross-docking with FRED.


Notes:
I had to re-prep the fragalysis download because only 157 of the structures were getting recognized, it still won't recognize these three structures as being part of the cache:
`{'Mpro-P0053_0A', 'Mpro-P0091_0A', 'Mpro-P2007_0A'}`
See more in `cluster_scripts/analyze_missing_structures.ipynb`

There isn't anything obvious about the structures to me so a deeper investigation would be needed. I'm just pushing ahead with the 202 that we have.
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"outputs": [],
"source": [
"df = pd.read_csv(\"/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/1/docking_results_final.csv\")"
],
"metadata": {
"collapsed": false
},
"id": "f23acff9f7022de5",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"df.groupby([\"ligand_id\", \"in-silico_SARS-CoV-2-Mpro_docking-structure-POSIT_msk\"]).nunique()"
],
"metadata": {
"collapsed": false
},
"id": "5336d3511e3e0674",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"mol_ids = df.ligand_id.unique()"
],
"metadata": {
"collapsed": false
},
"id": "5e0d03b3e6a9a876",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"structure_id = set(df[\"in-silico_SARS-CoV-2-Mpro_docking-structure-POSIT_msk\"].unique())"
],
"metadata": {
"collapsed": false
},
"id": "44101b3c0f0823e7",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"len(structure_id)"
],
"metadata": {
"collapsed": false
},
"id": "5a1c8e1a867f6af1",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"from pathlib import Path"
],
"metadata": {
"collapsed": false
},
"id": "6c0172eef2b37a4f",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"xtal_dirs = set([xtal_dir.stem for xtal_dir in Path(\"/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/mpro_fragalysis-04-01-24_p_series/aligned\").glob(\"Mpro*\")])"
],
"metadata": {
"collapsed": false
},
"id": "f0b749bacd3538a8",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"xtal_dirs - structure_id"
],
"metadata": {
"collapsed": false
},
"id": "8a90582cd00a3a4e",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "e39381b8e90365e2"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "38b88560-cfae-4977-9e73-a584490b4628",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7a2096e-65df-46d4-bf30-f77717fc53ab",
"metadata": {},
"outputs": [],
"source": [
"correct_structures = Path(\"/home/paynea/asap-datasets/mpro_fragalysis-04-01-24_curated_cache\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5b5f64f-af37-42d0-8d75-5b77005322b9",
"metadata": {},
"outputs": [],
"source": [
"original = Path(\"/home/paynea/asap-datasets/mpro_fragalysis-04-01-24/aligned\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee5f8148-0387-443c-8eab-11ca5778ce17",
"metadata": {},
"outputs": [],
"source": [
"new = Path(\"/home/paynea/asap-datasets/mpro_fragalysis-04-01-24_p_series/aligned\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04569019-2371-4625-bba9-62bfc5e84514",
"metadata": {},
"outputs": [],
"source": [
"prepped_dirs = [prepped_dir for prepped_dir in correct_structures.glob(\"Mpro*\")]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a7955af-74fe-461a-bb52-64bdc49a477b",
"metadata": {},
"outputs": [],
"source": [
"len(prepped_dirs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db1bba21-fb3d-4171-930a-d459fa497602",
"metadata": {},
"outputs": [],
"source": [
"frag_dirs = [prepped_dir.stem[:13] for prepped_dir in prepped_dirs]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d859d75-26c2-4061-a32c-5a7041e1c61a",
"metadata": {},
"outputs": [],
"source": [
"for prepped_dir in prepped_dirs:\n",
" frag_name = prepped_dir.stem[:13]\n",
" shutil.copytree(original / frag_name, new / frag_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4838b95d-1414-4b46-b66b-d9e723b12990",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:asapdiscovery]",
"language": "python",
"name": "conda-env-asapdiscovery-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"target": "SARS-CoV-2-Mpro",
"pdb_file": null,
"fragalysis_dir": "/lila/data/chodera/asap-datasets/mpro_fragalysis-04-01-24_p_series",
"structure_dir": null,
"cache_dir": "/lila/data/chodera/asap-datasets/mpro_fragalysis-04-01-24_curated_cache_p_series",
"save_to_cache": true,
"align": null,
"ref_chain": null,
"active_site_chain": null,
"seqres_yaml": null,
"loop_db": "/lila/home/kaminowb/.openeye/rcsb_spruce.loop_db",
"oe_active_site_residue": null,
"use_dask": true,
"dask_type": "local",
"dask_n_workers": null,
"logname": "",
"loglevel": 20,
"output_dir": "/lila/data/chodera/paynea/asapdiscovery-sars-retrospective/science/20240801_complete_fred_docking/cluster_scripts/fragalysis_prep_out"
}
Loading