Analyze DL/ML results

Setup

from fastbook import *
from scipy.optimize import minimize
from katlas.imports import *
import seaborn as sns

Load data

# read training data
df = pd.read_parquet('train_data/combine_t5_kd.parquet').reset_index()
# column name of feature and target
feat_col = df.columns[df.columns.str.startswith('T5_')]
target_col = df.columns[~df.columns.isin(feat_col)][1:]

y = df[target_col]
oof_results = pd.read_pickle('raw/oof.pkl')
source = pd.read_excel('train_data/combine_info_PSPA.xlsx').iloc[:,:2]

info = Data.get_kinase_info().query('pseudo !="1"')

info = source.merge(info)

Ensemble

The goal here is to use different weights for each oof to optimize the results; then choose the top models, and re-ensemble them and see their weights.

def ensemble(oof_results, # dictionary of oofs
             y # target df
            ):
    
    "Ensemble a dictionary of OOFs"
    
    oofs = np.zeros((len(oof_results), y.shape[0], y.shape[1]))
    y_true = y.values
    
    for i in range(oofs.shape[0]):
        oofs[i] = list(oof_results.values())[i]
        
    # The sum of weights is 1 (because we normalize it)
    cons = ({'type':'eq','fun':lambda w: 1-sum(w)})

    # Each weight is bounded between 0 and 1
    num_models = len(oofs)
    bounds = [(0, 1)] * num_models

    # Initial guess - divide 1 equally among all models
    initial_weights = [1./num_models] * num_models
    
    # Objective function to minimize (Mean Squared Error)
    def objective(weights): 
        ''' Minimize negative Mean Squared Error to find the best weights '''
        final_prediction = sum(w * oof for w, oof in zip(weights, oofs))
        return np.mean((final_prediction - y_true) ** 2)

    # Use 'trust-constr' or 'SLSQP' method to ensure constraints are followed
    result = minimize(objective, initial_weights, method='SLSQP', bounds=bounds, constraints=cons)
    # result = minimize(objective, initial_weights, method='trust-constr', bounds=bounds, constraints=cons, options={'maxiter': 5000})

    # Check whether it is converged
    print(f'result: {result.success}')
    print(result.message)

    # Extract the optimized weights
    best_weights = result.x
    # print(best_weights)
    
    weights = {}

    i = 0
    for k,v in oof_results.items():
        weights[k] = best_weights[i]
        i+=1
        
    weights_df = pd.DataFrame(weights.items(),columns=['model','weights'])
    
    display(weights_df.sort_values('weights',ascending=False))
    
    final_prediction = sum(w * oof for w, oof in zip(best_weights, oofs))
    oof_w = pd.DataFrame(final_prediction,columns=target_col)
    
    return weights_df, oof_w
# get ensemble weights for each oof
weights,oof_ensemble = ensemble(oof_results,y)
result: True
Optimization terminated successfully
model weights
5 cnn_t5_kd 2.229052e-01
7 cnn_esm_kd 2.044781e-01
4 cnn_t5 1.372858e-01
19 Ridge_esm_kd 7.176390e-02
29 KNN_t5_kd 7.153355e-02
11 LinearRegression_esm_kd 5.621854e-02
10 LinearRegression_esm 5.518402e-02
9 LinearRegression_t5_kd 4.254569e-02
6 cnn_esm 4.045877e-02
28 KNN_t5 2.837086e-02
31 KNN_esm_kd 2.613324e-02
8 LinearRegression_t5 1.770226e-02
25 DecisionTreeRegressor_t5_kd 1.498182e-02
27 DecisionTreeRegressor_esm_kd 5.412341e-03
18 Ridge_esm 5.025994e-03
15 Lasso_esm_kd 1.081117e-17
20 ElasticNet_t5 7.169454e-18
1 mlp_t5_kd 5.042534e-18
14 Lasso_esm 3.581190e-18
0 mlp_t5 3.488243e-18
21 ElasticNet_t5_kd 1.283283e-18
22 ElasticNet_esm 1.233044e-18
13 Lasso_t5_kd 8.692677e-19
2 mlp_esm 7.528412e-19
17 Ridge_t5_kd 0.000000e+00
23 ElasticNet_esm_kd 0.000000e+00
24 DecisionTreeRegressor_t5 0.000000e+00
26 DecisionTreeRegressor_esm 0.000000e+00
12 Lasso_t5 0.000000e+00
3 mlp_esm_kd 0.000000e+00
30 KNN_esm 0.000000e+00
16 Ridge_t5 0.000000e+00
_ = score_each(y,oof_ensemble)
overall MSE: 0.2979
Average Pearson: 0.8240 

Then we choose the top three models and ensemble them.

top = ['cnn_t5_kd','cnn_esm_kd','cnn_t5']
oofs2 = {key: oof_results[key] for key in top}
weights2,oof_ensemble2 = ensemble(oofs2,y)
result: True
Optimization terminated successfully
model weights
1 cnn_esm_kd 0.370932
0 cnn_t5_kd 0.350575
2 cnn_t5 0.278493
_,_,corr = score_each(y,oof_ensemble2)
overall MSE: 0.3081
Average Pearson: 0.8174 
# Pie Chart
labels = weights2.model
sizes =  weights2.weights

plt.figure(figsize=(5, 5))
plt.pie(sizes, labels=labels, autopct='%1.2f%%', startangle=140, colors=sns.color_palette("Set3", len(weights2)))
plt.title("Ensemble Weights")
plt.show()

As it looks to be an average of the three models, we’ll take the average of them

oof_ensemble = (oof_results['cnn_t5_kd'] + oof_results['cnn_esm_kd'] + oof_results['cnn_t5']) / 3
oof_ensemble
-5P -5G -5A -5C -5S -5T -5V -5I -5L -5M -5F -5Y -5W -5H -5K -5R -5Q -5N -5D -5E -5s -5t -5y -4P -4G -4A -4C -4S -4T -4V -4I -4L -4M -4F -4Y -4W -4H -4K -4R -4Q -4N -4D -4E -4s -4t -4y -3P -3G -3A -3C -3S -3T -3V -3I -3L -3M -3F -3Y -3W -3H -3K -3R -3Q -3N -3D -3E -3s -3t -3y -2P -2G -2A -2C -2S -2T -2V -2I -2L -2M -2F -2Y -2W -2H -2K -2R -2Q -2N -2D -2E -2s -2t -2y -1P -1G -1A -1C -1S -1T -1V -1I -1L -1M -1F -1Y -1W -1H -1K -1R -1Q -1N -1D -1E -1s -1t -1y 1P 1G 1A 1C 1S 1T 1V 1I 1L 1M 1F 1Y 1W 1H 1K 1R 1Q 1N 1D 1E 1s 1t 1y 2P 2G 2A 2C 2S 2T 2V 2I 2L 2M 2F 2Y 2W 2H 2K 2R 2Q 2N 2D 2E 2s 2t 2y 3P 3G 3A 3C 3S 3T 3V 3I 3L 3M 3F 3Y 3W 3H 3K 3R 3Q 3N 3D 3E 3s 3t 3y 4P 4G 4A 4C 4S 4T 4V 4I 4L 4M 4F 4Y 4W 4H 4K 4R 4Q 4N 4D 4E 4s 4t 4y 0s 0t 0y
0 0.188878 0.634647 0.887343 -0.927092 -0.392501 -0.586569 0.142022 0.007973 0.562061 -0.993858 -0.566768 -1.220567 -1.158315 -0.642133 1.120802 0.364175 0.375434 0.055257 1.663378 2.376884 -0.301946 -0.794536 -0.552649 0.190727 1.270748 0.513715 -0.854113 -0.487572 -0.638809 0.230982 -0.242128 1.036821 -0.792146 -0.403138 -1.319813 -1.409867 -0.841248 0.476729 0.362317 -0.164695 -0.232484 1.704270 2.104024 0.291016 -0.687562 -0.548687 0.069685 0.753290 1.037177 -1.145434 -0.621697 -0.929131 0.163861 -0.488886 0.603018 -0.976631 -0.657797 -1.337881 -1.360138 -0.648267 0.672734 0.166015 0.424121 0.005110 1.784140 3.080080 0.135212 -0.625342 -0.208076 0.522695 1.255310 0.811666 -1.000513 -0.704257 -0.894888 0.036102 -0.181274 0.143785 -1.174770 -0.859642 -1.324347 -1.155678 -0.889019 0.587813 -0.260471 0.352046 0.267965 2.108112 2.557907 0.359679 -0.419893 -0.191064 -0.031897 -0.041261 0.768281 -1.046224 -1.056009 -0.841082 1.678762 1.474963 2.380413 -1.209911 -0.328476 -1.353052 -1.178340 -0.810562 -0.073252 -0.627969 -0.244801 -0.143653 1.185254 1.820870 -0.503965 0.033932 0.055079 -0.990394 2.050974 1.202434 -0.896472 -0.751740 -1.074353 0.673735 0.215778 0.388112 -0.904532 -0.696476 -1.325945 -1.133978 -0.718839 -0.004357 -0.723982 0.306474 -0.201503 1.981238 3.301958 0.194510 -0.673171 -0.147423 -0.012991 0.666403 1.149449 -0.809711 -0.726497 -0.936235 0.676131 -0.102694 0.933665 -0.895986 -0.314606 -1.249670 -1.060242 -0.775663 0.120224 -0.079433 -0.139687 0.144503 1.457012 2.152325 0.207267 -0.323534 -0.220516 0.579143 0.581741 0.764128 -0.780944 -0.667929 -0.927524 0.894377 0.651753 2.628730 -0.783616 -0.169506 -1.289550 -1.116577 -0.653992 0.193128 0.715483 -0.328777 -0.497816 0.547089 0.568319 -0.167851 -0.542341 -0.194183 0.416947 0.874074 1.057034 -0.942906 -0.373124 -0.787586 0.601266 -0.254105 0.793548 -0.834147 -0.294836 -1.225388 -1.186767 -0.653884 0.888362 0.690264 -0.043264 -0.115584 1.033319 1.457796 -0.159984 -0.520591 -0.457668 -0.704891 -0.679028 1.414340
1 -0.025378 0.714929 0.920944 -1.022550 -0.377764 -0.636222 0.134602 -0.041053 0.666049 -0.975972 -0.436032 -1.089625 -1.148152 -0.615748 1.547186 0.616245 0.263874 0.219127 1.361456 1.681483 -0.458307 -0.781143 -0.551071 0.131412 1.280712 0.343339 -0.892329 -0.411230 -0.595306 0.272755 -0.149464 1.104624 -0.778450 -0.379281 -1.217710 -1.201595 -0.732177 0.788771 0.675667 -0.106178 -0.085377 1.359592 1.520061 0.125365 -0.668326 -0.569933 0.217928 0.803164 1.125930 -1.078663 -0.694710 -0.892364 0.308811 -0.217046 0.690249 -0.967827 -0.508961 -1.235178 -1.157629 -0.601595 0.733407 0.290156 0.361249 -0.081060 1.511702 2.072425 0.152596 -0.614647 -0.341213 0.232904 0.967064 0.870205 -0.957279 -0.700066 -1.014585 0.176937 -0.020815 0.229948 -1.000263 -0.768759 -1.291999 -1.003731 -0.901384 1.032405 0.113560 0.275431 -0.084332 1.372409 2.307034 0.361895 -0.367394 -0.101802 -0.505137 -0.005732 0.529710 -1.089594 -1.030629 -0.930996 0.675152 0.595349 1.792188 -1.231175 -0.360018 -1.313301 -1.248776 -0.891328 -0.034980 -0.276024 -0.269184 0.695430 3.171903 2.060493 -0.313544 -0.091605 0.062153 -0.884825 0.876114 1.224441 -0.919931 -0.917134 -1.074066 1.190683 0.769583 0.686133 -0.905399 -0.505974 -1.319224 -1.148758 -0.704071 -0.205544 -0.778959 0.389553 -0.531959 1.657287 3.571767 0.192003 -0.706291 -0.158499 -0.136238 0.138411 0.950910 -0.904203 -0.872926 -0.971147 0.585231 -0.246415 0.663709 -0.727415 -0.171336 -1.205408 -1.029339 -0.633804 0.182212 -0.301457 -0.200499 0.235688 2.338714 2.657143 0.058047 -0.332117 -0.117603 0.449370 0.295798 0.717976 -0.782166 -1.050453 -0.913831 1.325246 0.764809 3.037096 -0.633829 -0.091207 -1.248452 -1.097738 -0.679635 -0.100422 0.314001 -0.278309 -0.327892 0.736089 0.606189 -0.329023 -0.365070 -0.414462 0.277456 0.820147 0.992880 -0.931553 -0.488792 -0.853461 0.552210 -0.261802 0.841647 -0.791326 -0.152338 -1.163227 -1.173420 -0.663181 0.814095 0.477838 -0.275963 -0.028011 1.277532 1.637414 -0.231553 -0.460945 -0.521478 -0.666918 -0.666190 1.365925
2 -0.024647 0.560756 0.934807 -0.974955 -0.175042 -0.629669 0.058849 -0.054240 0.548685 -0.881397 -0.394332 -1.076726 -1.069301 -0.709501 1.228867 0.759148 0.222496 0.029304 1.321192 1.630601 -0.312367 -0.637080 -0.497616 0.074048 1.185613 0.402796 -0.747981 -0.400933 -0.575594 0.273450 -0.230542 1.133951 -0.717366 -0.266730 -1.135025 -1.146772 -0.733745 0.728746 0.739683 -0.047235 -0.141333 1.264864 1.433561 0.203258 -0.433761 -0.338125 -0.044754 0.713836 1.139258 -0.899315 -0.652277 -0.879406 0.059048 -0.320130 0.305766 -0.920699 -0.600200 -1.255255 -1.168093 -0.597407 0.832587 0.963251 0.302916 -0.164538 1.218751 2.107990 0.381744 -0.424843 -0.323495 0.356763 0.976482 0.622836 -0.739690 -0.612199 -0.895093 0.093400 -0.090386 0.139569 -0.966888 -0.727430 -1.153557 -0.945674 -0.698418 0.708270 0.314546 0.157194 0.149991 1.325282 1.825401 0.602699 -0.325473 -0.059798 -0.236990 0.297713 0.743918 -0.943952 -1.088841 -0.932575 0.906144 0.701359 1.553143 -1.070274 -0.357941 -1.170364 -1.054697 -0.674670 0.179464 0.007014 -0.023326 0.243161 1.640087 1.131995 -0.228535 -0.038759 0.317038 -0.753399 0.888694 0.988067 -0.806323 -0.818472 -0.953558 0.886148 0.677087 0.715901 -0.695982 0.193626 -1.151319 -1.019825 -0.642614 -0.016035 -0.493689 0.357618 -0.568868 1.169129 2.158598 0.385091 -0.366585 0.149272 0.035840 0.311061 0.678965 -0.755270 -0.885359 -0.873519 0.491406 -0.120983 0.781147 -0.662009 -0.124938 -1.059510 -1.038169 -0.550205 0.209194 0.310325 -0.297987 0.256709 1.455295 1.829231 0.255894 -0.268770 0.083823 0.590880 0.364527 0.517759 -0.591672 -0.807841 -0.794786 1.258402 0.601661 2.574703 -0.628313 -0.008908 -1.041480 -0.960783 -0.690295 -0.127888 0.610275 -0.348584 -0.423146 0.385747 0.535732 -0.134539 -0.336298 -0.352171 0.373305 0.949861 0.825229 -0.804869 -0.547974 -0.773568 0.533965 -0.140524 0.621019 -0.819822 -0.151661 -1.073182 -0.975920 -0.773664 0.731652 0.564922 -0.105418 -0.128784 0.877137 1.439533 -0.146537 -0.233944 -0.445056 -0.472215 -0.577957 1.176335
3 -0.115886 0.820746 1.072479 -1.107175 -0.336168 -0.784364 0.094444 -0.072963 0.810767 -1.064753 -0.608579 -1.339365 -1.245652 -0.831597 1.721726 0.712874 0.235246 0.276209 1.626535 2.092431 -0.482970 -0.750626 -0.460130 0.236611 1.392298 0.586144 -0.986109 -0.543984 -0.869062 0.406866 -0.199675 1.261367 -0.931979 -0.453303 -1.378041 -1.429664 -0.885421 0.790337 0.861260 -0.105331 -0.107974 1.406247 1.556562 0.291431 -0.635116 -0.547837 0.246646 0.842397 1.374244 -1.213337 -0.836970 -1.061880 0.445910 -0.240524 0.818263 -1.145204 -0.652396 -1.454522 -1.350311 -0.748239 1.094022 0.496164 0.291302 -0.021706 1.384924 2.188752 0.427537 -0.540029 -0.402436 0.263044 1.126636 0.818492 -1.084867 -0.777840 -1.156849 0.150072 -0.151041 0.160017 -1.158610 -0.724767 -1.481028 -1.148549 -1.060825 1.138146 0.097964 0.181290 0.068909 1.746760 2.522271 0.638805 -0.363088 -0.016735 -0.108448 1.061750 0.936679 -1.093320 -1.129213 -1.258457 0.759771 0.511325 1.550209 -1.309304 -0.281704 -1.516510 -1.239510 -0.933464 0.154529 -0.326074 -0.101655 0.455826 2.016972 1.575466 -0.069357 -0.185581 0.533793 -0.699799 1.152556 0.896781 -1.046621 -1.170381 -1.218255 1.496725 0.998083 1.133697 -0.938286 -0.015171 -1.440916 -1.188007 -0.683514 0.378795 -0.640599 0.183254 -0.548098 1.053966 2.207037 0.354827 -0.519037 0.394696 0.244732 0.381438 1.113125 -0.899122 -1.096222 -1.173454 0.687293 -0.046008 0.940206 -0.894093 -0.104996 -1.368054 -1.066989 -0.855887 0.294873 0.093265 -0.139776 0.133550 1.779457 1.869632 0.243867 -0.375760 0.071816 0.692799 0.415830 0.703685 -0.878906 -1.026634 -1.112752 1.394995 0.792975 2.919907 -0.848465 -0.122632 -1.400679 -1.258569 -0.974020 0.270461 0.788220 -0.574408 -0.295961 0.706526 0.730792 -0.166971 -0.385794 -0.312091 0.376540 0.884624 0.977607 -1.104369 -0.602411 -0.914841 0.743480 -0.106034 1.201335 -0.814697 -0.223407 -1.285807 -1.231427 -0.859937 1.158588 0.774048 -0.271745 -0.174051 0.883699 1.405990 -0.198720 -0.331293 -0.493096 -0.617485 -0.773803 1.381849
4 0.005970 0.751114 1.085722 -1.039904 -0.406701 -0.679811 0.072615 -0.075559 0.692090 -1.072586 -0.460515 -1.288880 -1.222914 -0.716772 1.364289 0.482584 0.263830 0.171767 1.651816 2.032803 -0.388215 -0.720607 -0.454893 0.265395 1.379802 0.589531 -0.818324 -0.519316 -0.746067 0.413932 -0.178369 1.170074 -0.894897 -0.355494 -1.330207 -1.312370 -0.802593 0.591601 0.688329 -0.156697 -0.163164 1.411729 1.699529 0.206069 -0.666106 -0.523285 0.173828 0.883171 1.294354 -1.112042 -0.686691 -0.939822 0.276185 -0.307773 0.697519 -0.985738 -0.642088 -1.393248 -1.392490 -0.732761 0.973595 0.358805 0.335087 0.064024 1.482641 2.376040 0.233742 -0.572048 -0.423111 0.447690 1.153062 0.833891 -1.003197 -0.699037 -0.993186 0.153248 -0.209088 0.223381 -1.165334 -0.741596 -1.444063 -1.076746 -0.977388 0.960369 -0.033733 0.309105 0.115317 1.677490 2.432875 0.478845 -0.499757 -0.138839 -0.151743 0.650876 0.836831 -0.989553 -1.087273 -1.087959 0.785815 0.776908 1.770988 -1.228824 -0.357408 -1.430037 -1.258179 -0.797396 0.152119 -0.311512 -0.174495 0.422862 1.711298 1.606907 -0.111176 -0.190658 0.401723 -0.786975 1.135739 1.058611 -0.969714 -0.932000 -1.149220 1.275183 0.763192 1.097840 -0.851838 -0.135197 -1.440695 -1.176001 -0.535466 0.188978 -0.533128 0.373298 -0.448216 1.071386 2.115694 0.146461 -0.588486 0.263037 0.268874 0.469420 1.094261 -0.928724 -1.012780 -1.003566 0.759516 -0.058255 0.952261 -0.984737 -0.179058 -1.345385 -1.038068 -0.822988 0.324975 0.130887 -0.013518 0.059971 1.701512 2.136519 0.050153 -0.482915 -0.053709 0.684908 0.572248 0.710637 -0.836325 -0.807433 -0.992573 1.345920 0.821516 2.887753 -0.706492 -0.110270 -1.351152 -1.205737 -0.737653 0.244887 0.633224 -0.466895 -0.268452 0.583513 0.529670 -0.291038 -0.485136 -0.285899 0.483719 0.887183 1.036619 -1.038690 -0.437466 -0.841210 0.719239 -0.163292 1.080628 -0.780459 -0.308839 -1.277385 -1.213813 -0.850979 1.037729 0.641427 -0.145962 -0.076124 0.989158 1.263439 -0.166760 -0.514443 -0.423403 -0.676674 -0.746608 1.362359
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
385 0.578521 0.815616 -0.008575 0.070166 0.041314 -0.172278 -0.091158 -0.266501 -0.299746 -0.250539 0.037579 0.365901 0.166937 0.305540 -0.234406 0.311013 -0.110569 -0.114513 -0.452355 -0.358867 -0.177268 -0.018237 0.172704 0.274292 0.847559 0.161803 0.157351 -0.062481 0.004793 -0.146151 -0.365771 -0.515028 -0.126082 0.122814 0.157954 0.266116 0.356241 -0.009107 0.023418 -0.148322 0.061621 -0.206390 -0.161031 -0.145769 -0.173076 -0.111084 0.722043 1.151718 0.075962 0.111401 -0.285507 -0.096357 -0.541320 -0.756826 -0.717633 -0.359527 -0.290220 -0.110387 -0.381590 0.329526 -0.013344 0.645284 0.069591 -0.080180 -0.271745 -0.389325 0.168342 0.324664 0.071264 1.154482 2.082457 0.152455 0.251916 -0.405427 -0.261448 -0.662001 -0.681511 -0.454917 -0.063818 -0.606271 -0.279721 -0.684625 0.440753 -0.405008 0.477990 0.301193 0.743176 -0.173409 -0.523468 -0.174576 -0.337592 -0.310680 -0.409673 -0.728864 -0.224772 -0.421970 -0.001823 -0.022175 0.726590 0.268676 0.504831 0.188382 0.003810 0.641346 -0.001090 0.389831 0.532993 1.586001 0.033036 -0.142395 -0.569806 -0.779241 -0.615640 -0.564244 0.172966 -1.446164 -1.158714 -0.324238 -0.100830 0.009197 -0.054438 1.004409 0.018712 -0.682721 0.401000 -0.044198 0.451011 0.404542 0.097068 1.442745 3.523521 1.326161 -0.442833 -1.214377 -0.791535 -0.586027 -0.601752 -0.526851 -1.061495 -0.837982 -0.073638 1.031971 -0.445046 -0.147461 1.855818 1.188337 0.445286 1.252941 -0.193166 0.172993 -0.162267 0.209781 0.546619 0.245775 0.375268 -0.744369 -1.455791 -1.201336 -0.631667 -0.685654 0.551269 -0.736841 1.236806 0.449564 -0.154367 -0.164832 -0.148053 -0.329820 -0.414346 -0.298004 -0.010251 0.186727 0.785969 0.857228 1.468341 -0.210802 1.092909 -0.021411 0.736797 -1.086275 -1.293551 -0.710390 -0.662196 -0.044366 -0.379483 0.707046 0.061626 0.087535 0.007229 -0.019088 -0.553130 -0.311879 -0.316806 0.212299 -0.013452 0.280088 0.704376 0.256359 0.145685 0.728678 -0.053264 0.159969 -0.264172 -0.542244 -0.728845 -0.736948 0.484063 -0.014160 0.981396 -0.823922
386 -0.343952 -0.155117 0.006063 0.285334 -0.023862 -0.073818 -0.028110 -0.030005 -0.120587 -0.063003 0.280372 0.288623 0.244785 0.197624 1.120448 1.342140 -0.393144 -0.255975 -0.764882 -0.688033 -0.411714 -0.402310 -0.559895 -0.188687 -0.186149 0.085229 0.209445 -0.110868 -0.112135 -0.038973 -0.186215 -0.290907 -0.101395 -0.244995 0.194110 0.174400 0.261491 1.335250 1.739041 -0.331995 -0.067029 -0.969707 -0.835346 -0.225157 -0.148649 -0.401451 -0.357386 -0.172101 -0.267598 0.055623 -0.301132 -0.293372 -0.312323 -0.360072 -0.296617 0.118884 -0.607369 -0.253655 -0.284446 0.472431 2.497014 4.099475 -0.093472 -0.581561 -1.095450 -1.027082 -0.416788 -0.326693 -0.437598 0.242633 1.007429 0.612549 0.445686 -0.113361 -0.165806 -0.077746 -0.558640 -0.442297 -0.442617 -0.120817 -0.194407 -0.131095 0.800921 0.920657 3.575207 -0.188720 -0.316105 -1.465925 -1.350469 -0.464359 -0.489196 -0.903108 -0.498298 0.109299 -0.114649 -0.001037 0.134193 0.014040 0.015718 -0.352781 0.098042 0.576062 -0.191512 0.245011 -0.089833 0.270160 1.991614 2.207065 0.346616 -0.165267 -1.167193 -1.213787 -0.807511 -0.884333 -0.417336 -1.620930 -0.555660 -0.464463 0.605316 0.025533 -0.010126 0.147222 0.569386 1.156091 1.878517 0.915935 0.596800 0.249085 -0.144031 0.167630 1.245727 0.605658 -0.114234 -1.367335 -1.328265 -0.889478 -0.864110 -1.000866 -0.936579 -0.003797 0.017008 -0.051265 -0.051686 -0.080829 0.378418 0.772260 0.344638 0.409918 0.039382 0.462811 0.084301 0.359274 0.503638 2.669749 -0.557002 -0.483690 -0.531673 -0.898888 -0.787395 -0.776047 -0.809296 -0.849585 -0.280741 -0.492412 -0.034216 -0.011144 -0.093382 -0.190026 0.007295 0.291797 0.243285 1.052784 0.933768 0.930262 0.666788 0.674066 1.130771 -0.423281 -0.062838 -0.576920 -0.789637 -0.839921 -0.702896 -0.638031 -0.294989 -0.192360 -0.384952 -0.066190 -0.074334 -0.004895 0.262383 0.598844 0.536701 0.483695 0.370696 0.272201 0.482870 0.014603 0.760624 0.726867 -0.293256 -0.392450 -0.695027 -0.750916 -0.507727 -0.535106 -0.734783 1.085545 -0.034680 -1.120232
387 -0.066611 0.160646 -0.006179 -0.204166 -0.168144 -0.095100 -0.243909 0.001036 0.018622 -0.132618 -0.108387 -0.101219 0.037201 0.357807 0.484108 0.846920 -0.316675 -0.220923 -0.182159 -0.201830 -0.020689 -0.018036 0.235278 0.006709 0.077673 -0.004747 -0.197423 -0.078612 -0.252352 -0.264757 -0.315038 -0.140217 -0.227206 -0.193583 -0.215091 0.134330 -0.096614 0.246689 0.545683 -0.251511 -0.228500 -0.169469 -0.057055 0.460213 0.310485 0.341025 -0.166372 -0.354935 -0.295832 -0.178881 -0.292668 -0.360785 -0.451096 -0.513882 -0.657880 -0.478158 -0.491899 -0.434251 -0.406700 -0.212107 0.124407 3.893183 -0.286749 -0.499564 -0.526325 -0.124638 1.252761 1.312268 0.342483 0.004115 -0.056000 -0.110703 0.007906 -0.230503 -0.064162 -0.338650 -0.398219 0.155609 -0.084716 -0.036575 -0.142463 -0.120580 -0.222994 -0.008855 0.615924 0.163314 -0.006318 -0.705759 -0.551729 0.300174 0.248571 1.596671 0.455188 0.153293 -0.110889 -0.167419 -0.122656 -0.197031 -0.184998 -0.355753 -0.013585 0.091351 0.036043 0.073222 -0.143890 0.139732 0.614046 0.811038 -0.067180 0.125167 -0.353471 -0.382233 -0.262606 -0.128705 -0.115752 -0.930421 -0.507747 -0.406452 -0.126812 -0.148081 -0.326349 0.227474 0.215811 -0.184397 0.205914 1.248248 -0.133319 -0.132941 -0.284734 -0.096872 0.267902 0.014163 -0.482787 -0.561293 -0.408395 0.966641 0.926225 0.544455 -0.586569 -0.499789 -0.065891 -0.199973 -0.439885 -0.387354 -0.157257 -0.178767 -0.327227 -0.351513 -0.125342 -0.064263 0.032916 -0.383703 -0.438517 -0.171442 -0.539449 -0.618777 -0.057934 -0.143144 -0.004363 -0.055794 4.528556 -0.463506 -0.244622 -0.200806 0.088204 -0.189010 -0.130377 -0.079691 -0.034115 -0.205404 -0.151083 -0.100164 0.088742 0.401504 -0.072054 -0.382634 -0.209524 -0.169464 -0.283133 0.080513 0.219396 0.427655 0.448607 1.228775 0.066399 -0.074440 -0.069934 -0.223483 -0.160704 -0.181497 -0.348366 -0.218320 -0.077495 -0.241710 -0.254692 -0.243633 -0.004971 -0.168279 -0.118715 -0.021867 -0.164025 -0.209928 0.084288 0.064790 0.679144 0.696433 1.445132 0.759144 0.359163 -1.111332
388 0.122353 0.206068 0.121852 0.135813 -0.106405 -0.109500 -0.318797 -0.224553 -0.402368 -0.253572 -0.036752 0.022141 0.066784 -0.063115 0.251984 0.423665 -0.267971 -0.062237 -0.152510 -0.180560 0.266860 0.226020 0.638867 0.088377 0.154580 0.005537 -0.090565 0.017880 -0.177648 -0.368970 -0.474865 -0.318903 -0.211556 -0.036925 -0.068680 0.345664 -0.108639 0.092657 0.241915 -0.125310 -0.220962 -0.083301 0.101778 0.665436 0.661190 0.391846 0.032425 -0.122417 -0.246262 -0.158146 -0.240017 -0.122328 -0.403602 -0.548235 -0.350910 -0.150657 -0.175774 -0.052711 -0.224810 -0.142411 -0.019941 0.819984 -0.153961 -0.405981 -0.366120 -0.108321 1.687876 1.622842 0.652195 -0.479712 -0.341194 -0.167773 0.185297 -0.248528 -0.174750 -0.413215 -0.459817 0.526658 0.149524 0.233992 0.404246 -0.384420 -0.202454 -0.232836 0.875517 -0.114594 -0.221065 -0.712393 -0.532897 0.319931 0.323008 2.141544 -0.064749 -0.010680 0.034973 0.001995 -0.051811 -0.032508 -0.348255 -0.522988 0.082399 0.101567 -0.123955 0.217515 -0.207852 -0.092817 0.690799 0.952746 0.093686 0.173415 -0.280747 -0.262276 -0.089357 -0.138289 0.316182 -0.668020 -0.985175 -0.451458 -0.087931 -0.302541 -0.245176 0.238130 0.574219 0.316361 0.675789 0.944873 0.063380 -0.064271 -0.529131 -0.110035 0.152777 -0.179061 -0.656349 -1.086887 -0.572069 0.924667 0.833399 0.543780 -0.580854 -0.631862 -0.545981 -0.427226 -0.472761 -0.538179 -0.563131 -0.538459 -0.570086 -0.314039 -0.380604 0.047765 -0.288111 -0.177088 -0.163989 0.743438 -0.539873 -0.361177 -0.369740 -0.456937 0.372878 0.371784 5.449268 -0.405970 -0.293095 -0.158044 0.175940 -0.249805 -0.177893 -0.147267 -0.088540 -0.072262 -0.206695 0.068685 0.125533 0.060028 -0.050328 -0.266684 -0.032236 -0.238533 -0.261867 -0.173550 -0.290882 0.612625 0.566128 1.814760 0.186249 -0.183579 -0.175168 -0.185604 -0.089263 -0.080009 -0.237149 -0.422907 -0.339149 -0.322928 -0.295930 -0.266436 -0.128413 -0.140600 0.191414 0.118732 -0.214912 -0.150141 -0.197409 -0.067330 0.571728 0.583210 1.916396 0.428484 0.622558 -1.158134
389 0.188459 0.439853 0.060391 -0.086527 -0.111686 -0.140101 -0.463691 -0.544235 -0.622486 -0.254746 -0.199929 -0.062966 0.097649 0.434262 0.265382 0.478620 -0.427277 -0.060230 -0.091186 -0.309038 0.539724 0.559802 0.440024 0.128444 0.499696 0.072232 0.075077 -0.052714 -0.098313 -0.613954 -0.814414 -0.586662 -0.110255 0.008663 0.077707 0.526670 0.362162 0.250743 0.447850 -0.353245 0.000355 0.057385 -0.129434 0.129044 0.264084 -0.151309 0.187910 0.701375 -0.176368 0.160061 -0.201854 -0.140544 -0.994704 -1.104489 -0.316708 -0.046821 -0.307645 0.054225 0.131244 0.156841 0.514177 1.656849 -0.312444 0.051022 -0.255884 -0.577326 0.415048 0.403577 0.101188 -0.175350 0.444570 0.352650 0.795194 -0.160316 -0.056730 -0.546945 -0.769426 -0.528751 -0.205588 0.239008 0.641354 0.262148 0.426456 -0.454494 0.724283 -0.200243 0.330112 0.008259 -0.999883 0.017019 0.072978 0.122243 -0.523335 1.182480 -0.105016 -0.272883 -0.000234 -0.081880 -1.263563 -1.332233 -0.173574 0.552361 -0.588848 0.421488 -0.713752 0.874099 1.210574 2.066226 0.211755 0.655666 -0.286226 -0.599556 -0.776689 -0.756884 0.246952 -1.647672 -0.860567 -0.548863 0.526335 -0.349274 -0.382024 1.129147 1.860528 1.245924 2.198426 0.931788 0.749532 1.861245 -0.052901 -0.131282 0.375158 0.008166 -0.912338 -1.778002 -1.599741 -0.825851 -0.703281 -1.259517 -0.908033 -0.635809 -0.452992 0.022787 -0.297470 -0.285614 -0.502733 -0.523386 -0.616326 0.057821 0.086172 1.968491 0.404133 1.909161 2.396706 3.200826 -0.365028 0.028798 -1.295352 -1.507143 -0.796399 -0.895243 -1.172440 -0.527432 -0.025165 -0.362537 0.052917 0.104496 0.161911 -0.352495 0.092749 0.535177 0.261345 0.743398 0.675169 1.295758 0.199683 0.695838 1.587987 -0.377386 -0.247616 -1.245401 -1.319967 -0.703791 -0.856706 -0.503849 0.439088 0.397687 -0.232954 -0.087055 0.049947 -0.109176 -0.509142 -0.155921 -0.209409 -0.040702 -0.241797 0.307563 0.482718 0.484882 1.912625 1.505288 0.072022 0.128418 -0.799038 -0.709763 -0.688117 -0.758006 -0.777439 -0.206744 1.409324 -1.200882

390 rows × 210 columns

OOF Pearson score by family

_,_,corr = score_each(oof_ensemble,y)
overall MSE: 0.3086
Average Pearson: 0.8170 
corr = pd.concat([corr, info],axis=1)
corr_family = corr.groupby('family').agg({'Pearson':'mean','kinase':'count'})
corr_family.sort_values('Pearson',ascending=False)
Pearson kinase
family
Sev 0.981550 1
Met 0.979476 2
InsR 0.978676 3
Eph 0.976189 12
Akt 0.974935 3
... ... ...
KIS 0.102436 1
Bud32 0.096503 1
FAM20C 0.086976 1
CDC7 -0.028638 1
WEE -0.090115 1

100 rows × 2 columns

corr_family.to_csv('raw/oof_corr_family.csv')
corr_subfamily = corr.groupby('subfamily').agg({'Pearson':'mean','kinase':'count'})
corr_subfamily.sort_values('kinase',ascending=False)
Pearson kinase
subfamily
Eph 0.976189 12
Src 0.967152 11
NEK 0.778235 10
STE11 0.749903 7
CK1 0.948548 7
... ... ...
SNRK 0.700188 1
NKF2 0.301151 1
CDK9 0.943745 1
NKF1 0.734262 1
MOS 0.173084 1

160 rows × 2 columns

corr_subfamily.to_csv('raw/oof_corr_subfamily.csv')

Plot

sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")
corr
Pearson kinase source ID_coral uniprot ID_HGNC group family subfamily_coral subfamily in_ST_paper in_Tyr_paper in_cddm pseudo pspa_category_small pspa_category_big cddm_big cddm_small length human_uniprot_sequence kinasecom_domain nucleus cytosol cytoskeleton plasma membrane mitochondrion Golgi apparatus endoplasmic reticulum vesicle centrosome aggresome main_location
0 0.980211 SRC KS SRC P12931 SRC TK Src None Src 0 1 1 0 SRC SRC 1.0 2.0 536 MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL LRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAF NaN 2.0 NaN 6.0 NaN 2.0 NaN NaN NaN NaN plasma membrane
1 0.986307 EPHA3 KS EphA3 P29320 EPHA3 TK Eph None Eph 0 1 1 0 Ephrin receptors Ephrin receptors 1.0 2.0 983 MDCQLSILLLLSCSVLDSFGELIPQPSNEVNLLDSKTIQGELGWISYPSHGWEEISGVDEHYTPIRTYQVCNVMDHSQNNWLRTNWVPRNSAQKIYVELKFTLRDCNSIPLVLGTCKETFNLYYMESDDDHGVKFREHQFTKIDTIAADESFTQMDLGDRILKLNTEIREVGPVNKKGFYLAFQDVGACVALVSVRVYFKKCPFTVKNLAMFPDTVPMDSQSLVEVRGSCVNNSKEEDPPRMYCSTEGEWLVPIGKCSCNAGYEERGFMCQACRPGFYKALDGNMKCAKCPPHSSTQEDGSMNCRCENNYFRADKDPPSMACTRPPSSPRNVISNINETSVILDWSWPLDTGGRKDVTFNIICKKCGWNIKQCEPCSPNVRFLPRQFGLTNTTVTVTDLLAHTNYTFEIDAVNGVSELSSPPRQFAAVSITTNQAAPSPVLTIKKDRTSRNSISLSWQEPEHPNGIILDYEVKYYEKQEQETSYTILRARGTNVTISSLKPDTIYVFQIRARTAAGYGTNSRKFEFETSPDSFSISGESSQVVMIAISAAVAIILLTVVIYVLIGRFCGYKSKHGADEKRLHFGNGHLKLPGLRTY... ISIDKVVGAGEFGEVCSGRLKLPSKKEISVAIKTLKVGYTEKQRRDFLGEASIMGQFDHPNIIRLEGVVTKSKPVMIVTEYMENGSLDSFLRKHDAQFTVIQLVGMLRGIASGMKYLSDMGYVHRDLAARNILINSNLVCKVSDFGLSRVLEDDPEAAYTTRGGKIPIRWTSPEAIAYRKFTSASDVWSYGIVLWEVMSYGERPYWEMSNQDVIKAVDEGYRLPPPMDCPAALYQLMLDCWQKDRNNRPKFEQIVSI NaN 1.0 NaN 6.0 NaN 3.0 NaN NaN NaN NaN plasma membrane
2 0.954302 FES KS FES P07332 FES TK Fer None Fer 0 1 1 0 TAM receptors TAM receptors 1.0 2.0 822 MGFSSELCSPQGHGVLQQMQEAELRLLEGMRKWMAQRVKSDREYAGLLHHMSLQDSGGQSRAISPDSPISQSWAEITSQTEGLSRLLRQHAEDLNSGPLSKLSLLIRERQQLRKTYSEQWQQLQQELTKTHSQDIEKLKSQYRALARDSAQAKRKYQEASKDKDRDKAKDKYVRSLWKLFAHHNRYVLGVRAAQLHHQHHHQLLLPGLLRSLQDLHEEMACILKEILQEYLEISSLVQDEVVAIHREMAAAAARIQPEAEYQGFLRQYGSAPDVPPCVTFDESLLEEGEPLEPGELQLNELTVESVQHTLTSVTDELAVATEMVFRRQEMVTQLQQELRNEEENTHPRERVQLLGKRQVLQEALQGLQVALCSQAKLQAQQELLQTKLEHLGPGEPPPVLLLQDDRHSTSSSEQEREGGRTPTLEILKSHISGIFRPKFSLPPPLQLIPEVQKPLHEQLWYHGAIPRAEVAELLVHSGDFLVRESQGKQEYVLSVLWDGLPRHFIIQSLDNLYRLEGEGFPSIPLLIDHLLSTQQPLTKKSGVVLHRAVPKDKWVLNHEDLVLGEQIGRGNFGEVFSGRLRADNTLVAVKSCRETL... LVLGEQIGRGNFGEVFSGRLRADNTLVAVKSCRETLPPDLKAKFLQEARILKQYSHPNIVRLIGVCTQKQPIYIVMELVQGGDFLTFLRTEGARLRVKTLLQMVGDAAAGMEYLESKCCIHRDLAARNCLVTEKNVLKISDFGMSREEADGVYAASGGLRQVPVKWTAPEALNYGRYSSESDVWSFGILLWETFSLGASPYPNLSNQQTREFVEKGGRLPCPELCPDAVFRLMEQCWAYEPGQRPSFSTIYQELQS NaN 6.0 NaN 4.0 NaN NaN NaN NaN NaN NaN cytosol
3 0.973414 NTRK3 KS TRKC Q16288 NTRK3 TK Trk None Trk 0 1 1 0 Insulin and neurotrophin receptors Insulin and neurotrophin receptors 1.0 3.0 839 MDVSLCPAKCSFWRIFLLGSVWLDYVGSVLACPANCVCSKTEINCRRPDDGNLFPLLEGQDSGNSNGNASINITDISRNITSIHIENWRSLHTLNAVDMELYTGLQKLTIKNSGLRSIQPRAFAKNPHLRYINLSSNRLTTLSWQLFQTLSLRELQLEQNFFNCSCDIRWMQLWQEQGEAKLNSQNLYCINADGSQLPLFRMNISQCDLPEISVSHVNLTVREGDNAVITCNGSGSPLPDVDWIVTGLQSINTHQTNLNWTNVHAINLTLVNVTSEDNGFTLTCIAENVVGMSNASVALTVYYPPRVVSLEEPELRLEHCIEFVVRGNPPPTLHWLHNGQPLRESKIIHVEYYQEGEISEGCLLFNKPTHYNNGNYTLIAKNPLGTANQTINGHFLKEPFPESTDNFILFDEVSPTPPITVTHKPEEDTFGVSIAVGLAAFACVLLVVLFVMINKYGRRSKFGMKGPVAVISGEEDSASPLHHINHGITTPSSLDAGPDTVVIGMTRIPVIENPQYFRQGHNCHKPDTYVQHIKRRDIVLKRELGEGAFGKVFLAECYNLSPTKDKMLVAVKALKDPTLAARKDFQREAELLTNLQ... IVLKRELGEGAFGKVFLAECYNLSPTKDKMLVAVKALKDPTLAARKDFQREAELLTNLQHEHIVKFYGVCGDGDPLIMVFEYMKHGDLNKFLRAHGPDAMILVDGQPRQAKGELGLSQMLHIASQIASGMVYLASQHFVHRDLATRNCLVGANLLVKIGDFGMSRDVYSTDYYRVGGHTMLPIRWMPPESIMYRKFTTESDVWSFGVILWEIFTYGKQPWFQLSNTEVIECITQGRVLERPRVCPKEVYDVMLGCWQREPQQRLNIKEIYKI NaN 4.0 NaN 4.0 NaN 2.0 NaN NaN NaN NaN cytosol
4 0.981001 ALK KS ALK Q9UM73 ALK TK ALK None ALK 0 1 1 0 PDGF receptors PDGF receptors 1.0 3.0 1620 MGAIGLLWLLPLLLSTAAVGSGMGTGQRAGSPAAGPPLQPREPLSYSRLQRKSLAVDFVVPSLFRVYARDLLLPPSSSELKAGRPEARGSLALDCAPLLRLLGPAPGVSWTAGSPAPAEARTLSRVLKGGSVRKLRRAKQLVLELGEEAILEGCVGPPGEAAVGLLQFNLSELFSWWIRQGEGRLRIRLMPEKKASEVGREGRLSAAIRASQPRLLFQIFGTGHSSLESPTNMPSPSPDYFTWNLTWIMKDSFPFLSHRSRYGLECSFDFPCELEYSPPLHDLRNQSWSWRRIPSEEASQMDLLDGPGAERSKEMPRGSFLLLNTSADSKHTILSPWMRSSSEHCTLAVSVHRHLQPSGRYIAQLLPHNEAAREILLMPTPGKHGWTVLQGRIGRPDNPFRVALEYISSGNRSLSAVDFFALKNCSEGTSPGSKMALQSSFTCWNGTVLQLGQACDFHQDCAQGEDESQMCRKLPVGFYCNFEDGFCGWTQGTLSPHTPQWQVRTLKDARFQDHQDHALLLSTTDVPASESATVTSATFPAPIKSSPCELRMSWLIRGVLRGNVSLVLVENKTGKEQGRMVWHVAAYEGLSLWQWM... ITLIRGLGHGAFGEVYEGQVSGMPNDPSPLQVAVKTLPEVCSEQDELDFLMEALIISKFNHQNIVRCIGVSLQSLPRFILLELMAGGDLKSFLRETRPRPSQPSSLAMLDLLHVARDIACGCQYLEENHFIHRDIAARNCLLTCPGPGRVAKIGDFGMARDIYRASYYRKGGCAMLPVKWMPPEAFMEGIFTSKTDTWSFGVLLWEIFSLGYMPYPSKSNQEVLEFVTSGGRMDPPKNCPGPVYRIMTQCWQHQPEDRPNFAIILERIEY NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
385 0.698910 VRK2 PSPA VRK2 Q86Y07 VRK2 CK1 VRK None VRK 1 0 0 0 assorted assorted NaN NaN 508 MPPKRNEKYKLPIPFPEGKVLDDMEGNQWVLGKKIGSGGFGLIYLAFPTNKPEKDARHVVKVEYQENGPLFSELKFYQRVAKKDCIKKWIERKQLDYLGIPLFYGSGLTEFKGRSYRFMVMERLGIDLQKISGQNGTFKKSTVLQLGIRMLDVLEYIHENEYVHGDIKAANLLLGYKNPDQVYLADYGLSYRYCPNGNHKQYQENPRKGHNGTIEFTSLDAHKGVALSRRSDVEILGYCMLRWLCGKLPWEQNLKDPVAVQTAKTNLLDELPQSVLKWAPSGSSCCEIAQFLVCAHSLAYDEKPNYQALKKILNPHGIPLGPLDFSTKGQSINVHTPNSQKVDSQKAATKQVNKAHNRLIEKKVHSERSAESCATWKVQKEEKLIGLMNNEAAQESTRRRQKYQESQEPLNEVNSFPQKISYTQFPNSFYEPHQDFTSPDIFKKSRSPSWYKYTSTVSTGITDLESSTGLWPTISQFTLSEETNADVYYYRIIIPVLLMLVFLALFFL WVLGKKIGSGGFGLIYLAFPTNKPEKDARHVVKVEYQENGPLFSELKFYQRVAKKDCIKKWIERKQLDYLGIPLFYGSGLTEFKGRSYRFMVMERLGIDLQKISGQNGTFKKSTVLQLGIRMLDVLEYIHENEYVHGDIKAANLLLGYKNPDQVYLADYGLSYRYCPNGNHKQYQENPRKGHNGTIEFTSLDAHKGVALSRRSDVEILGYCMLRWLCGKLPWEQNLKDPVAVQTAKTNLLDELPQSVLKWAPSGSSCCEIAQFL NaN NaN NaN NaN NaN NaN 10.0 NaN NaN NaN endoplasmic reticulum
386 0.885726 WNK4 PSPA Wnk4 Q96J92 WNK4 Other WNK None WNK 1 0 0 0 RIPK/WNK RIPK/WNK NaN NaN 444 MLASPATETTVLMSQTEADLALRPPPPLGTAGQPRLGPPPRRARRFSGKAEPRPRSSRLSRRSSVDLGLLSSWSLPASPAPDPPDPPDSAGPGPARSPPPSSKEPPEGTWTEGAPVKAAEDSARPELPDSAVGPGSREPLRVPEAVALERRREQEEKEDMETQAVATSPDGRYLKFDIEIGRGSFKTVYRGLDTDTTVEVAWCELQTRKLSRAERQRFSEEVEMLKGLQHPNIVRFYDSWKSVLRGQVCIVLVTELMTSGTLKTYLRRFREMKPRVLQRWSRQILRGLHFLHSRVPPILHRDLKCDNVFITGPTGSVKIGDLGLATLKRASFAKSVIGTPEFMAPEMYEEKYDEAVDVYAFGMCMLEMATSEYPYSECQNAAQIYRKVTSGRKPNSFHKVKIPEVKEIIEGCIRTDKNERFTIQDLLAHAFFREERGVHVELAEEDDGEKPGLKLWLRMEDARRGGRPRDNQAIEFLFQLGRDAAEEVAQEMVALGLVCEADYQPVARAVRERVAAIQRKREKLRKARELEALPPEPGPPPATVPMAPGPPSVFPPEPEEPEADQHQPFLFRHASYSSTTSDCETDGYLSSSGFLD... LKFDIEIGRGSFKTVYRGLDTDTTVEVAWCELQTRKLSRAERQRFSEEVEMLKGLQHPNIVRFYDSWKSVLRGQVCIVLVTELMTSGTLKTYLRRFREMKPRVLQRWSRQILRGLHFLHSRVPPILHRDLKCDNVFITGPTGSVKIGDLGLATLKRASFAKSVIGTPEFMAPEMYEEKYDEAVDVYAFGMCMLEMATSEYPYSECQNAAQIYRKVTSGRKPNSFHKVKIPEVKEIIEGCIRTDKNERFTIQDLLAHAFF NaN NaN NaN NaN NaN NaN NaN NaN NaN 10.0 aggresome
387 0.767191 YANK2 PSPA YANK2 Q9NY57 STK32B AGC YANK None YANK 1 0 0 0 YANK acidophilic NaN NaN 414 MGGNHSHKPPVFDENEEVNFDHFQILRAIGKGSFGKVCIVQKRDTKKMYAMKYMNKQKCIERDEVRNVFRELQIMQGLEHPFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVHFTEGTVKLYICELALALEYLQRYHIIHRDIKPDNILLDEHGHVHITDFNIATVVKGAERASSMAGTKPYMAPEVFQVYMDRGPGYSYPVDWWSLGITAYELLRGWRPYEIHSVTPIDEILNMFKVERVHYSSTWCKGMVALLRKLLTKDPESRVSSLHDIQSVPYLADMNWDAVFKKALMPGFVPNKGRLNCDPTFELEEMILESKPLHKKKKRLAKNRSRDGTKDSCPLNGHLQHCLETVREEFIIFNREKLRRQQGQGSQLLDTDSRGGGQAQSKLQDGCNNNLLTHTCTRGCSS FQILRAIGKGSFGKVCIVQKRDTKKMYAMKYMNKQKCIERDEVRNVFRELQIMQGLEHPFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVHFTEGTVKLYICELALALEYLQRYHIIHRDIKPDNILLDEHGHVHITDFNIATVVKGAERASSMAGTKPYMAPEVFQVYMDRGPGYSYPVDWWSLGITAYELLRGWRPYEIHSVTPIDEILNMFKVERVHYSSTWCKGMVALLRKLLTKDPESRVSSLHDIQSVPYL 1.0 6.0 NaN 3.0 NaN NaN NaN NaN NaN NaN cytosol
388 0.866177 YANK3 PSPA YANK3 Q86UX6 STK32C AGC YANK None YANK 1 0 0 0 YANK acidophilic NaN NaN 421 MRSGAERRGSSAAASPGSPPPGRARPAGSDAPSALPPPAAGQPRARDSGDVRSQPRPLFQWSKWKKRMGSSMSAATARRPVFDDKEDVNFDHFQILRAIGKGSFGKVCIVQKRDTEKMYAMKYMNKQQCIERDEVRNVFRELEILQEIEHVFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVQFSEDTVRLYICEMALALDYLRGQHIIHRDVKPDNILLDERGHAHLTDFNIATIIKDGERATALAGTKPYMAPEIFHSFVNGGTGYSFEVDWWSVGVMAYELLRGWRPYDIHSSNAVESLVQLFSTVSVQYVPTWSKEMVALLRKLLTVNPEHRLSSLQDVQAAPALAGVLWDHLSEKRVEPGFVPNKGRLHCDPTFELEEMILESRPLHKKKKRLAKNKSRDNSRDSSQSENDYLQDCLDAIQQDFVIFNREKLKRSQDLPREPLPAPESRDAAEPVEDEAERSALPMCGPICPSAGSG FQILRAIGKGSFGKVCIVQKRDTEKMYAMKYMNKQQCIERDEVRNVFRELEILQEIEHVFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVQFSEDTVRLYICEMALALDYLRGQHIIHRDVKPDNILLDERGHAHLTDFNIATIIKDGERATALAGTKPYMAPEIFHSFVNGGTGYSFEVDWWSVGVMAYELLRGWRPYDIHSSNAVESLVQLFSTVSVQYVPTWSKEMVALLRKLLTVNPEHRLSSLQDVQAAPAL 2.0 5.0 NaN 3.0 NaN NaN NaN NaN NaN NaN cytosol
389 0.638188 YSK4 PSPA MAP3K19 Q56UN5 MAP3K19 STE STE11 None STE11 1 0 0 0 MAP3K MAP3K NaN NaN 388 MSSMPKPERHAESLLDICHDTNSSPTDLMTVTKNQNIILQSISRSEEFDQDGDCSHSTLVNEEEDPSGGRQDWQPRTEGVEITVTFPRDVSPPQEMSQEDLKEKNLINSSLQEWAQAHAVSHPNEIETVELRKKKLTMRPLVLQKEESSRELCNVNLGFLLPRSCLELNISKSVTREDAPHFLKEQQRKSEEFSTSHMKYSGRSIKFLLPPLSLLPTRSGVLTIPQNHKFPKEKERNIPSLTSFVPKLSVSVRQSDELSPSNEPPGALVKSLMDPTLRSSDGFIWSRNMCSFPKTNHHRQCLEKEENWKSKEIEECNKIEITHFEKGQSLVSFENLKEGNIPAVREEDIDCHGSKTRKPEEENSQYLSSRKNESSVAKNYEQDPEIVCTIPSKFQETQHSEITPSQDEEMRNNKAASKRVSLHKNEAMEPNNILEECTVLKSLSSVVFDDPIDKLPEGCSSMETNIKISIAERAKPEMSRMVPLIHITFPVDGSPKEPVIAKPSLQTRKGTIHNNHSVNIPVHQENDKHKMNSHRSKLDSKTKTSKKTPQNFVISTEGPIKPTMHKTSIKTQIFPALGLVDPRPWQLPRFQKKMPQ... WTKGEILGKGAYGTVYCGLTSQGQLIAVKQVALDTSNKLAAEKEYRKLQEEVDLLKALKHVNIVAYLGTCLQENTVSIFMEFVPGGSISSIINRFGPLPEMVFCKYTKQILQGVAYLHENCVVHRDIKGNNVMLMPTGIIKLIDFGCARRLAWAGLNGTHSDMLKSMHGTPYWMAPEVINESGYGRKSDIWSIGCTVFEMATGKPPLASMDRMAAMFYIGAHRGLMPPLPDHFSENAADFVRMCLTRDQHERPSALQLLKHSFL NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN None

390 rows × 32 columns

group = corr.group.unique()
group
array(['TK', 'TKL', 'Other', 'AGC', 'STE', 'CAMK', 'Atypical', 'CMGC', 'CK1'], dtype=object)

Individual kinase group

corrs = [corr.query(f'group == "{i}"') for i in group]
for i in range(len(corrs)):
    plot_box(corrs[i],'Pearson','family',group[i])

In general

group_color = load_pickle("raw/kinase_color.pkl")
plot_bar(corr,'Pearson','group',palette=group_color,fontsize=20)
# plt.title('a')

Families with more counts

family_score = corr.groupby('family').agg(family_mean=('Pearson','mean'),cnt=('Pearson','size'))
family_score.sort_values('cnt',ascending=False)[:15]
family_mean cnt
family
STE20 0.863932 27
CAMKL 0.789590 20
CDK 0.923265 17
MAPK 0.881876 12
Eph 0.976189 12
Src 0.967152 11
DYRK 0.865820 10
NEK 0.778235 10
PKC 0.949881 9
STKR 0.857840 9
RSK 0.937107 8
STE11 0.749903 7
CK1 0.948548 7
GRK 0.813348 7
MLK 0.733784 7