forked from LindeSchoenmaker/IMERGE-FEP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjacs_intermediates.py
66 lines (57 loc) · 2.51 KB
/
jacs_intermediates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import glob
import os
import time
from itertools import combinations
import pandas as pd
from rdkit import Chem
from rgroupinterm.rgroupenumeration import EnumRGroups
# running this code requires downloading the fep benchmark set from https://github.com/JenkeScheen/fep_intermediate_generation
if not os.path.exists('./fep_intermediate_generation'):
path = "./"
clone = "git clone https://github.com/JenkeScheen/fep_intermediate_generation.git"
os.chdir(path) # Specifying the path where the cloned project needs to be copied
os.system(clone) # Cloning
if __name__ == "__main__":
df_comb = pd.DataFrame()
folders = [folder.split('/')[-1] for folder in glob.glob("fep_intermediate_generation/ligands/*")]
failed_combs = []
runtimes = []
for folder in folders:
if folder in ['readme.txt']:
continue
print(folder)
path = "fep_intermediate_generation/ligands/"
addendum = '*.sdf'
mols = []
for file in glob.glob(path + folder + '/' + addendum):
mol = Chem.rdmolfiles.SDMolSupplier(file)[0]
mols.append(mol)
for i, (liga, ligb) in enumerate(combinations(mols, 2)):
start = time.time()
generator = EnumRGroups()
liga = Chem.Mol(liga)
ligb = Chem.Mol(ligb)
try:
df_interm, core = generator.generate_intermediates([liga, ligb])
if 'Intermediate' in df_interm.columns:
generated_interm = df_interm['Intermediate'].tolist()
df_interm['Set'] = folder
df_interm['Pair'] = i
df_interm['Parent_1'] = Chem.MolToSmiles(liga)
df_interm['Parent_2'] = Chem.MolToSmiles(ligb)
df_comb = pd.concat([df_comb, df_interm], ignore_index=True)
except:
print(Chem.MolToSmiles(liga), Chem.MolToSmiles(ligb))
failed_combs.append([Chem.MolToSmiles(x) for x in [liga, ligb]])
pass
end = time.time()
runtimes.append(end-start)
df_comb['Intermediate'] = df_comb['Intermediate'].apply(lambda x: Chem.MolToSmiles(x))
df_comb.to_csv('data/jacs_intermediates.csv', index=False)
df = pd.DataFrame(failed_combs)
df.to_csv('data/jacs_intermediates_failing.csv')
with open('runtimes.txt', 'w') as fp:
for item in runtimes:
# write each item on a new line
fp.write("%s\n" % item)
print('Done')