Setup
from kdock.data.core import Data
from kdock.gnina.dock import *
from kdock.gnina.rescore import *
import pandas as pd, numpy as np
# setup_gnina_local('v1.1') # <= v1.1 allows cpu environment
Reading package lists...
Building dependency tree...
Reading state information...
openbabel is already the newest version (3.0.0+dfsg-3ubuntu3).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
Downloading v1.1 gnina
Finish setup!
Pulling GNINA Docker image: gnina/gnina
GNINA Docker image is ready.
Rescore
Get folder that contains the AF3 .cif files
local ./gnina
df = get_gnina_rescore_folder('~/mirati_structure' ,
rec_chain_id= 'A' ,
lig_chain_id= 'L' ,
)
CPU times: user 1.19 s, sys: 257 ms, total: 1.44 s
Wall time: 21min 55s
if vinardo score only:
df = get_gnina_rescore_folder('~/mirati_structure' ,
rec_chain_id= 'A' ,
lig_chain_id= 'L' ,
CNN_affinity= False ,
vinardo= True ,
)
Docker gnina (latest)
df = get_gnina_rescore_folder('~/mirati_structure' ,
rec_chain_id= 'A' ,
lig_chain_id= 'L' ,
is_local= False )
Merge with target
target = Data.get_mirati_g12d()
target.ID= target.ID.str .lower()
target['log_Kd' ] = np.log(target['Kd' ] + 1e-5 )
target['log_IC50' ] = np.log(target['IC50' ] + 1e-5 )
target['log_erk_IC50' ] = np.log(target['erk_IC50' ] + 1e-5 )
0
us_1
CN1CCC[C@H]1COc1nc(N2CC3CCC(C2)N3)c2cnc(cc2n1)...
97.7
124.7
3159.1
4.581902
4.825911
8.058042
-13.02696
-0.30852
0.88857
0.67440
7.76933
0.23841
1
us_4
Oc1cc(-c2ncc3c(nc(OCCc4ccccn4)nc3c2F)N2CC3CCC(...
155.7
496.2
8530.0
5.047931
6.206979
9.051345
-11.44830
-0.70801
1.01023
0.67062
7.98086
0.29582
2
us_5
Cn1nccc1COc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1)-...
294.8
722.9
8193.8
5.686297
6.583271
9.011133
-7.58041
-1.00321
0.91650
0.48476
7.27615
0.59758
3
us_6
Cc1cccnc1CCOc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1...
442.2
434.1
11518.2
6.091762
6.073275
9.351684
-7.00049
-1.53471
4.55312
0.54943
6.14055
0.24137
4
us_7
Oc1cc(-c2ncc3c(nc(OCCc4ncccn4)nc3c2F)N2CC3CCC(...
463.5
1867.3
NaN
6.138806
7.532249
NaN
-13.12053
-0.93447
0.28536
0.71432
7.42845
0.04068
out.to_csv('mirati_gnina_v1_1.csv' ,index= False )
Merge with AF score
df= pd.read_csv('mirati_gnina_rescore_v1_3.csv' )
af = pd.read_csv('AF3_mirati_660.csv' )
cols = ['ID' ,'chain_pair_pae_min_0_1' ,
'chain_pair_pae_min_1_0' , 'chain_pair_pae_min_1_1' , 'chain_ptm_0' ,
'chain_ptm_1' , 'fraction_disordered' , 'iptm' , 'ptm' , 'ranking_score' ,
'iptm_ptm_add' , 'chain_pair_pae_min_add' , 'iptm_rnk' , 'ptm_rnk' ,
'iptm_ptm_rnk_add' , 'chain_pair_pae_min_add_rnk' , 'iptm_pae_add_rnk' ]
df.to_csv('mirati_gnina_v1_3_af.csv' ,index= False )
Correlation
out.corr(numeric_only= True )
Kd
1.000000
0.137290
-0.088274
0.567310
0.081667
-0.167722
0.057558
-0.017178
-0.025133
-0.048843
-0.050384
0.025814
IC50
0.137290
1.000000
0.379993
0.409676
0.405385
0.242495
0.053759
0.043837
0.004114
-0.018452
-0.084494
-0.056643
erk_IC50
-0.088274
0.379993
1.000000
0.340125
0.712639
0.734366
0.021087
0.178889
-0.048670
0.003114
-0.090038
-0.111960
log_Kd
0.567310
0.409676
0.340125
1.000000
0.685839
0.489439
0.248571
-0.040234
0.127425
-0.239114
-0.297084
-0.032168
log_IC50
0.081667
0.405385
0.712639
0.685839
1.000000
0.782773
0.122564
0.225565
0.034728
-0.102558
-0.241604
-0.134573
log_erk_IC50
-0.167722
0.242495
0.734366
0.489439
0.782773
1.000000
0.104046
0.180925
-0.018172
-0.056561
-0.203590
-0.102189
binding_energy
0.057558
0.053759
0.021087
0.248571
0.122564
0.104046
1.000000
-0.446761
0.585579
-0.605980
-0.719846
0.146693
uncertainty
-0.017178
0.043837
0.178889
-0.040234
0.225565
0.180925
-0.446761
1.000000
-0.340409
0.415752
0.292460
-0.295152
RMSD
-0.025133
0.004114
-0.048670
0.127425
0.034728
-0.018172
0.585579
-0.340409
1.000000
-0.502661
-0.567085
0.241297
CNNscore
-0.048843
-0.018452
0.003114
-0.239114
-0.102558
-0.056561
-0.605980
0.415752
-0.502661
1.000000
0.804062
-0.475357
CNNaffinity
-0.050384
-0.084494
-0.090038
-0.297084
-0.241604
-0.203590
-0.719846
0.292460
-0.567085
0.804062
1.000000
-0.334174
CNNvariance
0.025814
-0.056643
-0.111960
-0.032168
-0.134573
-0.102189
0.146693
-0.295152
0.241297
-0.475357
-0.334174
1.000000
out.corr('spearman' ,numeric_only= True )
Kd
1.000000
0.770684
0.484753
1.000000
0.770684
0.484753
0.307612
-0.041270
0.197978
-0.271837
-0.377634
-0.075319
IC50
0.770684
1.000000
0.821105
0.770684
1.000000
0.821105
0.214448
0.201271
0.024432
-0.102062
-0.317811
-0.171460
erk_IC50
0.484753
0.821105
1.000000
0.484753
0.821105
1.000000
0.148804
0.197881
-0.012566
-0.031280
-0.228966
-0.100639
log_Kd
1.000000
0.770684
0.484753
1.000000
0.770684
0.484753
0.307612
-0.041270
0.197978
-0.271837
-0.377634
-0.075319
log_IC50
0.770684
1.000000
0.821105
0.770684
1.000000
0.821105
0.214448
0.201271
0.024432
-0.102062
-0.317811
-0.171460
log_erk_IC50
0.484753
0.821105
1.000000
0.484753
0.821105
1.000000
0.148804
0.197881
-0.012566
-0.031280
-0.228966
-0.100639
binding_energy
0.307612
0.214448
0.148804
0.307612
0.214448
0.148804
1.000000
-0.318202
0.643279
-0.596055
-0.657687
0.110284
uncertainty
-0.041270
0.201271
0.197881
-0.041270
0.201271
0.197881
-0.318202
1.000000
-0.285817
0.383521
0.103260
-0.278986
RMSD
0.197978
0.024432
-0.012566
0.197978
0.024432
-0.012566
0.643279
-0.285817
1.000000
-0.608563
-0.558381
0.233253
CNNscore
-0.271837
-0.102062
-0.031280
-0.271837
-0.102062
-0.031280
-0.596055
0.383521
-0.608563
1.000000
0.765298
-0.483761
CNNaffinity
-0.377634
-0.317811
-0.228966
-0.377634
-0.317811
-0.228966
-0.657687
0.103260
-0.558381
0.765298
1.000000
-0.251215
CNNvariance
-0.075319
-0.171460
-0.100639
-0.075319
-0.171460
-0.100639
0.110284
-0.278986
0.233253
-0.483761
-0.251215
1.000000
out.corr('spearman' ,numeric_only= True )
Kd
1.000000
0.770684
0.484753
0.307595
-0.041586
0.198397
-0.318074
-0.343751
0.173958
IC50
0.770684
1.000000
0.821105
0.214860
0.201848
0.019858
-0.164505
-0.355147
0.205119
erk_IC50
0.484753
0.821105
1.000000
0.150561
0.201548
-0.023826
-0.118687
-0.287123
0.202999
binding_energy
0.307595
0.214860
0.150561
1.000000
-0.317127
0.642633
-0.723730
-0.723106
0.457403
uncertainty
-0.041586
0.201848
0.201548
-0.317127
1.000000
-0.290202
0.329470
0.111949
-0.021203
RMSD
0.198397
0.019858
-0.023826
0.642633
-0.290202
1.000000
-0.664650
-0.442909
0.441766
CNNscore
-0.318074
-0.164505
-0.118687
-0.723730
0.329470
-0.664650
1.000000
0.635484
-0.412043
CNNaffinity
-0.343751
-0.355147
-0.287123
-0.723106
0.111949
-0.442909
0.635484
1.000000
-0.269412
CNNvariance
0.173958
0.205119
0.202999
0.457403
-0.021203
0.441766
-0.412043
-0.269412
1.000000