from katlas.core import *
from katlas.feature import *
from katlas.plot import *
from fastbook import *
from tqdm.notebook import tqdm; tqdm.pandas()
import seaborn as sns
Analysis of amino acid features
Extract features from amino acid
Setup
AA info
# Download the amino acid info
= Data.get_aa_info() aa
= aa.iloc[:-2,:] aa
Rdkit chemical property
= get_rdkit(aa, 'SMILES') aa_rdkit
aa_rdkit.shape
(23, 210)
= preprocess(aa_rdkit) aa_rdkit
removing columns: {'fr_COO2', 'ExactMolWt', 'fr_ether', 'fr_N_O', 'fr_phenol', 'fr_C_O_noCOO', 'fr_nitrile', 'fr_bicyclic', 'NumRadicalElectrons', 'fr_ketone', 'NumSaturatedCarbocycles', 'fr_amide', 'fr_nitro', 'fr_COO', 'fr_Al_OH_noTert', 'fr_isothiocyan', 'PEOE_VSA13', 'fr_oxazole', 'fr_azo', 'fr_nitro_arom', 'fr_phos_acid', 'SMR_VSA8', 'SlogP_VSA10', 'fr_Nhpyrrole', 'fr_thiophene', 'fr_morpholine', 'fr_piperdine', 'fr_lactone', 'Chi2n', 'fr_aniline', 'fr_furan', 'SMR_VSA2', 'fr_pyridine', 'HeavyAtomMolWt', 'fr_alkyl_carbamate', 'fr_ArN', 'SlogP_VSA6', 'fr_Ar_COO', 'fr_Ar_OH', 'fr_dihydropyridine', 'NumAliphaticCarbocycles', 'fr_epoxide', 'fr_aryl_methyl', 'MaxPartialCharge', 'fr_imide', 'Chi2v', 'fr_diazo', 'fr_ester', 'MinAbsPartialCharge', 'fr_nitroso', 'fr_para_hydroxylation', 'fr_allylic_oxid', 'fr_Ar_NH', 'fr_phos_ester', 'fr_quatN', 'fr_aldehyde', 'fr_sulfone', 'fr_hdrzone', 'fr_Ndealkylation2', 'fr_oxime', 'fr_priamide', 'VSA_EState9', 'fr_hdrzine', 'fr_guanido', 'NumSaturatedHeterocycles', 'fr_thiazole', 'MaxEStateIndex', 'PEOE_VSA5', 'HeavyAtomCount', 'NumSaturatedRings', 'fr_halogen', 'fr_ketone_Topliss', 'SlogP_VSA11', 'fr_prisulfonamd', 'fr_thiocyan', 'fr_urea', 'fr_term_acetylene', 'SlogP_VSA12', 'EState_VSA11', 'fr_benzodiazepine', 'fr_nitro_arom_nonortho', 'BCUT2D_MRHI', 'fr_sulfonamd', 'fr_Imine', 'fr_alkyl_halide', 'fr_isocyan', 'fr_Ndealkylation1', 'fr_HOCCN', 'fr_azide', 'fr_phenol_noOrthoHbond', 'SlogP_VSA9', 'fr_tetrazole', 'LabuteASA', 'fr_methoxy', 'NumValenceElectrons', 'fr_piperzine', 'Chi0', 'fr_benzene', 'VSA_EState1', 'fr_C_S', 'SlogP_VSA7', 'NumAliphaticRings', 'fr_amidine', 'fr_barbitur', 'fr_imidazole', 'fr_lactam'}
aa_rdkit.shape
(23, 104)
PCA
set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.'notebook')
sns.set_context("ticks") sns.set_style(
= aa.Name, hue = 'aa', method = 'pca') plot_cluster(aa_rdkit, name_list
Correlation of aa
= aa_rdkit.T.corr() rdkit_corr
draw_corr(rdkit_corr)
Correlation of aa in target
PSPA - tyr
= Data.get_pspa_tyr_norm().iloc[:,:-3]
df
= df.unstack().reset_index()
df2
= ['substrate','kinase','target']
df2.columns
'aa']=df2.substrate.str[-1]
df2[
'enum'] = df2.groupby('aa').cumcount()
df2[
= df2.pivot(index='enum', columns='aa', values='target')
df_pivot
= df_pivot.corr().round(2)
corr
draw_corr(corr)
PSPA - st
= Data.get_pspa_st_norm().iloc[:,:-3]
df
= df.unstack().reset_index()
df2
= ['substrate','kinase','target']
df2.columns
'aa']=df2.substrate.str[-1]
df2[
'enum'] = df2.groupby('aa').cumcount()
df2[
= df2.pivot(index='enum', columns='aa', values='target')
df_pivot
= df_pivot.corr().round(2)
corr
draw_corr(corr)
CDDM
= Data.get_cddm().iloc[:,:-3]
df
= df.unstack().reset_index()
df2
= ['substrate','kinase','target']
df2.columns
'aa']=df2.substrate.str[-1]
df2[
'enum'] = df2.groupby('aa').cumcount()
df2[
= df2.pivot(index='enum', columns='aa', values='target')
df_pivot
= df_pivot.corr().round(2)
corr
draw_corr(corr)