import pandas as pd
from katlas.core import *
Specificity
= pd.read_parquet('out/CDDM_pssms.parquet') pssms
= pssms.apply(get_specificity_flat , axis=1) spec
=Data.get_ks_dataset() df
CPU times: user 759 ms, sys: 409 ms, total: 1.17 s
Wall time: 4min 45s
'kinase_id'] = df['kinase_uniprot'] + '_' + df['kinase_genes'].str.split().str[0] df[
= df[['kinase_id','kinase_group']].drop_duplicates() info
'specificity'] = info.kinase_id.map(spec) info[
sum() info.isna().
kinase_id 0
kinase_group 30
specificity 120
dtype: int64
There are 30 kinase without assigning a kinase group
= info.dropna() info
= info.sort_values('specificity',ascending=False) info
from katlas.plot import *
set_sns()
='kinase_id',y='specificity',hue='kinase_group',palette=group_color) plot_rank(info,x
='specificity',group='kinase_group',palette=group_color) plot_bar(info,value