Gnina rescore with docked ligand

Setup

from kdock.data.core import Data
from kdock.gnina.dock import *
from kdock.gnina.rescore import *
import pandas as pd, numpy as np
# setup_gnina_local('v1.1') # <= v1.1 allows cpu environment
Reading package lists...
Building dependency tree...
Reading state information...
openbabel is already the newest version (3.0.0+dfsg-3ubuntu3).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
Downloading v1.1 gnina
Finish setup!
# setup_gnina_docker()
Pulling GNINA Docker image: gnina/gnina
GNINA Docker image is ready.

Rescore

Get folder that contains the AF3 .cif files

local ./gnina

df = get_gnina_rescore_folder('~/mirati_structure',
                              rec_chain_id='A',
                              lig_chain_id='L',
                             )
CPU times: user 1.19 s, sys: 257 ms, total: 1.44 s
Wall time: 21min 55s

if vinardo score only:

df = get_gnina_rescore_folder('~/mirati_structure',
                              rec_chain_id='A',
                              lig_chain_id='L',
                              CNN_affinity=False,
                              vinardo=True,
                             )

Docker gnina (latest)

df = get_gnina_rescore_folder('~/mirati_structure',
                              rec_chain_id='A',
                              lig_chain_id='L',
                              is_local=False)

Merge with target

df.ID = df.ID.str[:-15]
target = Data.get_mirati_g12d()
target.ID=target.ID.str.lower()
target['log_Kd'] = np.log(target['Kd'] + 1e-5)
target['log_IC50'] = np.log(target['IC50'] + 1e-5)
target['log_erk_IC50'] = np.log(target['erk_IC50'] + 1e-5)
out =target.merge(df)
out.head()
ID SMILES Kd IC50 erk_IC50 log_Kd log_IC50 log_erk_IC50 binding_energy uncertainty RMSD CNNscore CNNaffinity CNNvariance
0 us_1 CN1CCC[C@H]1COc1nc(N2CC3CCC(C2)N3)c2cnc(cc2n1)... 97.7 124.7 3159.1 4.581902 4.825911 8.058042 -13.02696 -0.30852 0.88857 0.67440 7.76933 0.23841
1 us_4 Oc1cc(-c2ncc3c(nc(OCCc4ccccn4)nc3c2F)N2CC3CCC(... 155.7 496.2 8530.0 5.047931 6.206979 9.051345 -11.44830 -0.70801 1.01023 0.67062 7.98086 0.29582
2 us_5 Cn1nccc1COc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1)-... 294.8 722.9 8193.8 5.686297 6.583271 9.011133 -7.58041 -1.00321 0.91650 0.48476 7.27615 0.59758
3 us_6 Cc1cccnc1CCOc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1... 442.2 434.1 11518.2 6.091762 6.073275 9.351684 -7.00049 -1.53471 4.55312 0.54943 6.14055 0.24137
4 us_7 Oc1cc(-c2ncc3c(nc(OCCc4ncccn4)nc3c2F)N2CC3CCC(... 463.5 1867.3 NaN 6.138806 7.532249 NaN -13.12053 -0.93447 0.28536 0.71432 7.42845 0.04068
out.to_csv('mirati_gnina_v1_1.csv',index=False)

Merge with AF score

df=pd.read_csv('mirati_gnina_rescore_v1_3.csv')
af = pd.read_csv('AF3_mirati_660.csv')
cols = ['ID','chain_pair_pae_min_0_1',
       'chain_pair_pae_min_1_0', 'chain_pair_pae_min_1_1', 'chain_ptm_0',
       'chain_ptm_1', 'fraction_disordered', 'iptm', 'ptm', 'ranking_score',
       'iptm_ptm_add', 'chain_pair_pae_min_add', 'iptm_rnk', 'ptm_rnk',
       'iptm_ptm_rnk_add', 'chain_pair_pae_min_add_rnk', 'iptm_pae_add_rnk']
af = af[cols]
df = df.merge(af)
df.to_csv('mirati_gnina_v1_3_af.csv',index=False)

Correlation

out.corr(numeric_only=True)
Kd IC50 erk_IC50 log_Kd log_IC50 log_erk_IC50 binding_energy uncertainty RMSD CNNscore CNNaffinity CNNvariance
Kd 1.000000 0.137290 -0.088274 0.567310 0.081667 -0.167722 0.057558 -0.017178 -0.025133 -0.048843 -0.050384 0.025814
IC50 0.137290 1.000000 0.379993 0.409676 0.405385 0.242495 0.053759 0.043837 0.004114 -0.018452 -0.084494 -0.056643
erk_IC50 -0.088274 0.379993 1.000000 0.340125 0.712639 0.734366 0.021087 0.178889 -0.048670 0.003114 -0.090038 -0.111960
log_Kd 0.567310 0.409676 0.340125 1.000000 0.685839 0.489439 0.248571 -0.040234 0.127425 -0.239114 -0.297084 -0.032168
log_IC50 0.081667 0.405385 0.712639 0.685839 1.000000 0.782773 0.122564 0.225565 0.034728 -0.102558 -0.241604 -0.134573
log_erk_IC50 -0.167722 0.242495 0.734366 0.489439 0.782773 1.000000 0.104046 0.180925 -0.018172 -0.056561 -0.203590 -0.102189
binding_energy 0.057558 0.053759 0.021087 0.248571 0.122564 0.104046 1.000000 -0.446761 0.585579 -0.605980 -0.719846 0.146693
uncertainty -0.017178 0.043837 0.178889 -0.040234 0.225565 0.180925 -0.446761 1.000000 -0.340409 0.415752 0.292460 -0.295152
RMSD -0.025133 0.004114 -0.048670 0.127425 0.034728 -0.018172 0.585579 -0.340409 1.000000 -0.502661 -0.567085 0.241297
CNNscore -0.048843 -0.018452 0.003114 -0.239114 -0.102558 -0.056561 -0.605980 0.415752 -0.502661 1.000000 0.804062 -0.475357
CNNaffinity -0.050384 -0.084494 -0.090038 -0.297084 -0.241604 -0.203590 -0.719846 0.292460 -0.567085 0.804062 1.000000 -0.334174
CNNvariance 0.025814 -0.056643 -0.111960 -0.032168 -0.134573 -0.102189 0.146693 -0.295152 0.241297 -0.475357 -0.334174 1.000000
out.corr('spearman',numeric_only=True)
Kd IC50 erk_IC50 log_Kd log_IC50 log_erk_IC50 binding_energy uncertainty RMSD CNNscore CNNaffinity CNNvariance
Kd 1.000000 0.770684 0.484753 1.000000 0.770684 0.484753 0.307612 -0.041270 0.197978 -0.271837 -0.377634 -0.075319
IC50 0.770684 1.000000 0.821105 0.770684 1.000000 0.821105 0.214448 0.201271 0.024432 -0.102062 -0.317811 -0.171460
erk_IC50 0.484753 0.821105 1.000000 0.484753 0.821105 1.000000 0.148804 0.197881 -0.012566 -0.031280 -0.228966 -0.100639
log_Kd 1.000000 0.770684 0.484753 1.000000 0.770684 0.484753 0.307612 -0.041270 0.197978 -0.271837 -0.377634 -0.075319
log_IC50 0.770684 1.000000 0.821105 0.770684 1.000000 0.821105 0.214448 0.201271 0.024432 -0.102062 -0.317811 -0.171460
log_erk_IC50 0.484753 0.821105 1.000000 0.484753 0.821105 1.000000 0.148804 0.197881 -0.012566 -0.031280 -0.228966 -0.100639
binding_energy 0.307612 0.214448 0.148804 0.307612 0.214448 0.148804 1.000000 -0.318202 0.643279 -0.596055 -0.657687 0.110284
uncertainty -0.041270 0.201271 0.197881 -0.041270 0.201271 0.197881 -0.318202 1.000000 -0.285817 0.383521 0.103260 -0.278986
RMSD 0.197978 0.024432 -0.012566 0.197978 0.024432 -0.012566 0.643279 -0.285817 1.000000 -0.608563 -0.558381 0.233253
CNNscore -0.271837 -0.102062 -0.031280 -0.271837 -0.102062 -0.031280 -0.596055 0.383521 -0.608563 1.000000 0.765298 -0.483761
CNNaffinity -0.377634 -0.317811 -0.228966 -0.377634 -0.317811 -0.228966 -0.657687 0.103260 -0.558381 0.765298 1.000000 -0.251215
CNNvariance -0.075319 -0.171460 -0.100639 -0.075319 -0.171460 -0.100639 0.110284 -0.278986 0.233253 -0.483761 -0.251215 1.000000
out.corr('spearman',numeric_only=True)
Kd IC50 erk_IC50 binding_energy uncertainty RMSD CNNscore CNNaffinity CNNvariance
Kd 1.000000 0.770684 0.484753 0.307595 -0.041586 0.198397 -0.318074 -0.343751 0.173958
IC50 0.770684 1.000000 0.821105 0.214860 0.201848 0.019858 -0.164505 -0.355147 0.205119
erk_IC50 0.484753 0.821105 1.000000 0.150561 0.201548 -0.023826 -0.118687 -0.287123 0.202999
binding_energy 0.307595 0.214860 0.150561 1.000000 -0.317127 0.642633 -0.723730 -0.723106 0.457403
uncertainty -0.041586 0.201848 0.201548 -0.317127 1.000000 -0.290202 0.329470 0.111949 -0.021203
RMSD 0.198397 0.019858 -0.023826 0.642633 -0.290202 1.000000 -0.664650 -0.442909 0.441766
CNNscore -0.318074 -0.164505 -0.118687 -0.723730 0.329470 -0.664650 1.000000 0.635484 -0.412043
CNNaffinity -0.343751 -0.355147 -0.287123 -0.723106 0.111949 -0.442909 0.635484 1.000000 -0.269412
CNNvariance 0.173958 0.205119 0.202999 0.457403 -0.021203 0.441766 -0.412043 -0.269412 1.000000