from fastbook import *
from scipy.optimize import minimize
from katlas.imports import *
import seaborn as sns
Analyze DL/ML results
Setup
Load data
# read training data
= pd.read_parquet('train_data/combine_t5_kd.parquet').reset_index()
df # column name of feature and target
= df.columns[df.columns.str.startswith('T5_')]
feat_col = df.columns[~df.columns.isin(feat_col)][1:]
target_col
= df[target_col] y
= pd.read_pickle('raw/oof.pkl') oof_results
= pd.read_excel('train_data/combine_info_PSPA.xlsx').iloc[:,:2]
source
= Data.get_kinase_info().query('pseudo !="1"')
info
= source.merge(info) info
Ensemble
The goal here is to use different weights for each oof to optimize the results; then choose the top models, and re-ensemble them and see their weights.
def ensemble(oof_results, # dictionary of oofs
# target df
y
):
"Ensemble a dictionary of OOFs"
= np.zeros((len(oof_results), y.shape[0], y.shape[1]))
oofs = y.values
y_true
for i in range(oofs.shape[0]):
= list(oof_results.values())[i]
oofs[i]
# The sum of weights is 1 (because we normalize it)
= ({'type':'eq','fun':lambda w: 1-sum(w)})
cons
# Each weight is bounded between 0 and 1
= len(oofs)
num_models = [(0, 1)] * num_models
bounds
# Initial guess - divide 1 equally among all models
= [1./num_models] * num_models
initial_weights
# Objective function to minimize (Mean Squared Error)
def objective(weights):
''' Minimize negative Mean Squared Error to find the best weights '''
= sum(w * oof for w, oof in zip(weights, oofs))
final_prediction return np.mean((final_prediction - y_true) ** 2)
# Use 'trust-constr' or 'SLSQP' method to ensure constraints are followed
= minimize(objective, initial_weights, method='SLSQP', bounds=bounds, constraints=cons)
result # result = minimize(objective, initial_weights, method='trust-constr', bounds=bounds, constraints=cons, options={'maxiter': 5000})
# Check whether it is converged
print(f'result: {result.success}')
print(result.message)
# Extract the optimized weights
= result.x
best_weights # print(best_weights)
= {}
weights
= 0
i for k,v in oof_results.items():
= best_weights[i]
weights[k] +=1
i
= pd.DataFrame(weights.items(),columns=['model','weights'])
weights_df
'weights',ascending=False))
display(weights_df.sort_values(
= sum(w * oof for w, oof in zip(best_weights, oofs))
final_prediction = pd.DataFrame(final_prediction,columns=target_col)
oof_w
return weights_df, oof_w
# get ensemble weights for each oof
= ensemble(oof_results,y) weights,oof_ensemble
result: True
Optimization terminated successfully
model | weights | |
---|---|---|
5 | cnn_t5_kd | 2.229052e-01 |
7 | cnn_esm_kd | 2.044781e-01 |
4 | cnn_t5 | 1.372858e-01 |
19 | Ridge_esm_kd | 7.176390e-02 |
29 | KNN_t5_kd | 7.153355e-02 |
11 | LinearRegression_esm_kd | 5.621854e-02 |
10 | LinearRegression_esm | 5.518402e-02 |
9 | LinearRegression_t5_kd | 4.254569e-02 |
6 | cnn_esm | 4.045877e-02 |
28 | KNN_t5 | 2.837086e-02 |
31 | KNN_esm_kd | 2.613324e-02 |
8 | LinearRegression_t5 | 1.770226e-02 |
25 | DecisionTreeRegressor_t5_kd | 1.498182e-02 |
27 | DecisionTreeRegressor_esm_kd | 5.412341e-03 |
18 | Ridge_esm | 5.025994e-03 |
15 | Lasso_esm_kd | 1.081117e-17 |
20 | ElasticNet_t5 | 7.169454e-18 |
1 | mlp_t5_kd | 5.042534e-18 |
14 | Lasso_esm | 3.581190e-18 |
0 | mlp_t5 | 3.488243e-18 |
21 | ElasticNet_t5_kd | 1.283283e-18 |
22 | ElasticNet_esm | 1.233044e-18 |
13 | Lasso_t5_kd | 8.692677e-19 |
2 | mlp_esm | 7.528412e-19 |
17 | Ridge_t5_kd | 0.000000e+00 |
23 | ElasticNet_esm_kd | 0.000000e+00 |
24 | DecisionTreeRegressor_t5 | 0.000000e+00 |
26 | DecisionTreeRegressor_esm | 0.000000e+00 |
12 | Lasso_t5 | 0.000000e+00 |
3 | mlp_esm_kd | 0.000000e+00 |
30 | KNN_esm | 0.000000e+00 |
16 | Ridge_t5 | 0.000000e+00 |
= score_each(y,oof_ensemble) _
overall MSE: 0.2979
Average Pearson: 0.8240
Then we choose the top three models and ensemble them.
= ['cnn_t5_kd','cnn_esm_kd','cnn_t5'] top
= {key: oof_results[key] for key in top} oofs2
= ensemble(oofs2,y) weights2,oof_ensemble2
result: True
Optimization terminated successfully
model | weights | |
---|---|---|
1 | cnn_esm_kd | 0.370932 |
0 | cnn_t5_kd | 0.350575 |
2 | cnn_t5 | 0.278493 |
= score_each(y,oof_ensemble2) _,_,corr
overall MSE: 0.3081
Average Pearson: 0.8174
# Pie Chart
= weights2.model
labels = weights2.weights
sizes
=(5, 5))
plt.figure(figsize=labels, autopct='%1.2f%%', startangle=140, colors=sns.color_palette("Set3", len(weights2)))
plt.pie(sizes, labels"Ensemble Weights")
plt.title( plt.show()
As it looks to be an average of the three models, we’ll take the average of them
= (oof_results['cnn_t5_kd'] + oof_results['cnn_esm_kd'] + oof_results['cnn_t5']) / 3 oof_ensemble
oof_ensemble
-5P | -5G | -5A | -5C | -5S | -5T | -5V | -5I | -5L | -5M | -5F | -5Y | -5W | -5H | -5K | -5R | -5Q | -5N | -5D | -5E | -5s | -5t | -5y | -4P | -4G | -4A | -4C | -4S | -4T | -4V | -4I | -4L | -4M | -4F | -4Y | -4W | -4H | -4K | -4R | -4Q | -4N | -4D | -4E | -4s | -4t | -4y | -3P | -3G | -3A | -3C | -3S | -3T | -3V | -3I | -3L | -3M | -3F | -3Y | -3W | -3H | -3K | -3R | -3Q | -3N | -3D | -3E | -3s | -3t | -3y | -2P | -2G | -2A | -2C | -2S | -2T | -2V | -2I | -2L | -2M | -2F | -2Y | -2W | -2H | -2K | -2R | -2Q | -2N | -2D | -2E | -2s | -2t | -2y | -1P | -1G | -1A | -1C | -1S | -1T | -1V | -1I | -1L | -1M | -1F | -1Y | -1W | -1H | -1K | -1R | -1Q | -1N | -1D | -1E | -1s | -1t | -1y | 1P | 1G | 1A | 1C | 1S | 1T | 1V | 1I | 1L | 1M | 1F | 1Y | 1W | 1H | 1K | 1R | 1Q | 1N | 1D | 1E | 1s | 1t | 1y | 2P | 2G | 2A | 2C | 2S | 2T | 2V | 2I | 2L | 2M | 2F | 2Y | 2W | 2H | 2K | 2R | 2Q | 2N | 2D | 2E | 2s | 2t | 2y | 3P | 3G | 3A | 3C | 3S | 3T | 3V | 3I | 3L | 3M | 3F | 3Y | 3W | 3H | 3K | 3R | 3Q | 3N | 3D | 3E | 3s | 3t | 3y | 4P | 4G | 4A | 4C | 4S | 4T | 4V | 4I | 4L | 4M | 4F | 4Y | 4W | 4H | 4K | 4R | 4Q | 4N | 4D | 4E | 4s | 4t | 4y | 0s | 0t | 0y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.188878 | 0.634647 | 0.887343 | -0.927092 | -0.392501 | -0.586569 | 0.142022 | 0.007973 | 0.562061 | -0.993858 | -0.566768 | -1.220567 | -1.158315 | -0.642133 | 1.120802 | 0.364175 | 0.375434 | 0.055257 | 1.663378 | 2.376884 | -0.301946 | -0.794536 | -0.552649 | 0.190727 | 1.270748 | 0.513715 | -0.854113 | -0.487572 | -0.638809 | 0.230982 | -0.242128 | 1.036821 | -0.792146 | -0.403138 | -1.319813 | -1.409867 | -0.841248 | 0.476729 | 0.362317 | -0.164695 | -0.232484 | 1.704270 | 2.104024 | 0.291016 | -0.687562 | -0.548687 | 0.069685 | 0.753290 | 1.037177 | -1.145434 | -0.621697 | -0.929131 | 0.163861 | -0.488886 | 0.603018 | -0.976631 | -0.657797 | -1.337881 | -1.360138 | -0.648267 | 0.672734 | 0.166015 | 0.424121 | 0.005110 | 1.784140 | 3.080080 | 0.135212 | -0.625342 | -0.208076 | 0.522695 | 1.255310 | 0.811666 | -1.000513 | -0.704257 | -0.894888 | 0.036102 | -0.181274 | 0.143785 | -1.174770 | -0.859642 | -1.324347 | -1.155678 | -0.889019 | 0.587813 | -0.260471 | 0.352046 | 0.267965 | 2.108112 | 2.557907 | 0.359679 | -0.419893 | -0.191064 | -0.031897 | -0.041261 | 0.768281 | -1.046224 | -1.056009 | -0.841082 | 1.678762 | 1.474963 | 2.380413 | -1.209911 | -0.328476 | -1.353052 | -1.178340 | -0.810562 | -0.073252 | -0.627969 | -0.244801 | -0.143653 | 1.185254 | 1.820870 | -0.503965 | 0.033932 | 0.055079 | -0.990394 | 2.050974 | 1.202434 | -0.896472 | -0.751740 | -1.074353 | 0.673735 | 0.215778 | 0.388112 | -0.904532 | -0.696476 | -1.325945 | -1.133978 | -0.718839 | -0.004357 | -0.723982 | 0.306474 | -0.201503 | 1.981238 | 3.301958 | 0.194510 | -0.673171 | -0.147423 | -0.012991 | 0.666403 | 1.149449 | -0.809711 | -0.726497 | -0.936235 | 0.676131 | -0.102694 | 0.933665 | -0.895986 | -0.314606 | -1.249670 | -1.060242 | -0.775663 | 0.120224 | -0.079433 | -0.139687 | 0.144503 | 1.457012 | 2.152325 | 0.207267 | -0.323534 | -0.220516 | 0.579143 | 0.581741 | 0.764128 | -0.780944 | -0.667929 | -0.927524 | 0.894377 | 0.651753 | 2.628730 | -0.783616 | -0.169506 | -1.289550 | -1.116577 | -0.653992 | 0.193128 | 0.715483 | -0.328777 | -0.497816 | 0.547089 | 0.568319 | -0.167851 | -0.542341 | -0.194183 | 0.416947 | 0.874074 | 1.057034 | -0.942906 | -0.373124 | -0.787586 | 0.601266 | -0.254105 | 0.793548 | -0.834147 | -0.294836 | -1.225388 | -1.186767 | -0.653884 | 0.888362 | 0.690264 | -0.043264 | -0.115584 | 1.033319 | 1.457796 | -0.159984 | -0.520591 | -0.457668 | -0.704891 | -0.679028 | 1.414340 |
1 | -0.025378 | 0.714929 | 0.920944 | -1.022550 | -0.377764 | -0.636222 | 0.134602 | -0.041053 | 0.666049 | -0.975972 | -0.436032 | -1.089625 | -1.148152 | -0.615748 | 1.547186 | 0.616245 | 0.263874 | 0.219127 | 1.361456 | 1.681483 | -0.458307 | -0.781143 | -0.551071 | 0.131412 | 1.280712 | 0.343339 | -0.892329 | -0.411230 | -0.595306 | 0.272755 | -0.149464 | 1.104624 | -0.778450 | -0.379281 | -1.217710 | -1.201595 | -0.732177 | 0.788771 | 0.675667 | -0.106178 | -0.085377 | 1.359592 | 1.520061 | 0.125365 | -0.668326 | -0.569933 | 0.217928 | 0.803164 | 1.125930 | -1.078663 | -0.694710 | -0.892364 | 0.308811 | -0.217046 | 0.690249 | -0.967827 | -0.508961 | -1.235178 | -1.157629 | -0.601595 | 0.733407 | 0.290156 | 0.361249 | -0.081060 | 1.511702 | 2.072425 | 0.152596 | -0.614647 | -0.341213 | 0.232904 | 0.967064 | 0.870205 | -0.957279 | -0.700066 | -1.014585 | 0.176937 | -0.020815 | 0.229948 | -1.000263 | -0.768759 | -1.291999 | -1.003731 | -0.901384 | 1.032405 | 0.113560 | 0.275431 | -0.084332 | 1.372409 | 2.307034 | 0.361895 | -0.367394 | -0.101802 | -0.505137 | -0.005732 | 0.529710 | -1.089594 | -1.030629 | -0.930996 | 0.675152 | 0.595349 | 1.792188 | -1.231175 | -0.360018 | -1.313301 | -1.248776 | -0.891328 | -0.034980 | -0.276024 | -0.269184 | 0.695430 | 3.171903 | 2.060493 | -0.313544 | -0.091605 | 0.062153 | -0.884825 | 0.876114 | 1.224441 | -0.919931 | -0.917134 | -1.074066 | 1.190683 | 0.769583 | 0.686133 | -0.905399 | -0.505974 | -1.319224 | -1.148758 | -0.704071 | -0.205544 | -0.778959 | 0.389553 | -0.531959 | 1.657287 | 3.571767 | 0.192003 | -0.706291 | -0.158499 | -0.136238 | 0.138411 | 0.950910 | -0.904203 | -0.872926 | -0.971147 | 0.585231 | -0.246415 | 0.663709 | -0.727415 | -0.171336 | -1.205408 | -1.029339 | -0.633804 | 0.182212 | -0.301457 | -0.200499 | 0.235688 | 2.338714 | 2.657143 | 0.058047 | -0.332117 | -0.117603 | 0.449370 | 0.295798 | 0.717976 | -0.782166 | -1.050453 | -0.913831 | 1.325246 | 0.764809 | 3.037096 | -0.633829 | -0.091207 | -1.248452 | -1.097738 | -0.679635 | -0.100422 | 0.314001 | -0.278309 | -0.327892 | 0.736089 | 0.606189 | -0.329023 | -0.365070 | -0.414462 | 0.277456 | 0.820147 | 0.992880 | -0.931553 | -0.488792 | -0.853461 | 0.552210 | -0.261802 | 0.841647 | -0.791326 | -0.152338 | -1.163227 | -1.173420 | -0.663181 | 0.814095 | 0.477838 | -0.275963 | -0.028011 | 1.277532 | 1.637414 | -0.231553 | -0.460945 | -0.521478 | -0.666918 | -0.666190 | 1.365925 |
2 | -0.024647 | 0.560756 | 0.934807 | -0.974955 | -0.175042 | -0.629669 | 0.058849 | -0.054240 | 0.548685 | -0.881397 | -0.394332 | -1.076726 | -1.069301 | -0.709501 | 1.228867 | 0.759148 | 0.222496 | 0.029304 | 1.321192 | 1.630601 | -0.312367 | -0.637080 | -0.497616 | 0.074048 | 1.185613 | 0.402796 | -0.747981 | -0.400933 | -0.575594 | 0.273450 | -0.230542 | 1.133951 | -0.717366 | -0.266730 | -1.135025 | -1.146772 | -0.733745 | 0.728746 | 0.739683 | -0.047235 | -0.141333 | 1.264864 | 1.433561 | 0.203258 | -0.433761 | -0.338125 | -0.044754 | 0.713836 | 1.139258 | -0.899315 | -0.652277 | -0.879406 | 0.059048 | -0.320130 | 0.305766 | -0.920699 | -0.600200 | -1.255255 | -1.168093 | -0.597407 | 0.832587 | 0.963251 | 0.302916 | -0.164538 | 1.218751 | 2.107990 | 0.381744 | -0.424843 | -0.323495 | 0.356763 | 0.976482 | 0.622836 | -0.739690 | -0.612199 | -0.895093 | 0.093400 | -0.090386 | 0.139569 | -0.966888 | -0.727430 | -1.153557 | -0.945674 | -0.698418 | 0.708270 | 0.314546 | 0.157194 | 0.149991 | 1.325282 | 1.825401 | 0.602699 | -0.325473 | -0.059798 | -0.236990 | 0.297713 | 0.743918 | -0.943952 | -1.088841 | -0.932575 | 0.906144 | 0.701359 | 1.553143 | -1.070274 | -0.357941 | -1.170364 | -1.054697 | -0.674670 | 0.179464 | 0.007014 | -0.023326 | 0.243161 | 1.640087 | 1.131995 | -0.228535 | -0.038759 | 0.317038 | -0.753399 | 0.888694 | 0.988067 | -0.806323 | -0.818472 | -0.953558 | 0.886148 | 0.677087 | 0.715901 | -0.695982 | 0.193626 | -1.151319 | -1.019825 | -0.642614 | -0.016035 | -0.493689 | 0.357618 | -0.568868 | 1.169129 | 2.158598 | 0.385091 | -0.366585 | 0.149272 | 0.035840 | 0.311061 | 0.678965 | -0.755270 | -0.885359 | -0.873519 | 0.491406 | -0.120983 | 0.781147 | -0.662009 | -0.124938 | -1.059510 | -1.038169 | -0.550205 | 0.209194 | 0.310325 | -0.297987 | 0.256709 | 1.455295 | 1.829231 | 0.255894 | -0.268770 | 0.083823 | 0.590880 | 0.364527 | 0.517759 | -0.591672 | -0.807841 | -0.794786 | 1.258402 | 0.601661 | 2.574703 | -0.628313 | -0.008908 | -1.041480 | -0.960783 | -0.690295 | -0.127888 | 0.610275 | -0.348584 | -0.423146 | 0.385747 | 0.535732 | -0.134539 | -0.336298 | -0.352171 | 0.373305 | 0.949861 | 0.825229 | -0.804869 | -0.547974 | -0.773568 | 0.533965 | -0.140524 | 0.621019 | -0.819822 | -0.151661 | -1.073182 | -0.975920 | -0.773664 | 0.731652 | 0.564922 | -0.105418 | -0.128784 | 0.877137 | 1.439533 | -0.146537 | -0.233944 | -0.445056 | -0.472215 | -0.577957 | 1.176335 |
3 | -0.115886 | 0.820746 | 1.072479 | -1.107175 | -0.336168 | -0.784364 | 0.094444 | -0.072963 | 0.810767 | -1.064753 | -0.608579 | -1.339365 | -1.245652 | -0.831597 | 1.721726 | 0.712874 | 0.235246 | 0.276209 | 1.626535 | 2.092431 | -0.482970 | -0.750626 | -0.460130 | 0.236611 | 1.392298 | 0.586144 | -0.986109 | -0.543984 | -0.869062 | 0.406866 | -0.199675 | 1.261367 | -0.931979 | -0.453303 | -1.378041 | -1.429664 | -0.885421 | 0.790337 | 0.861260 | -0.105331 | -0.107974 | 1.406247 | 1.556562 | 0.291431 | -0.635116 | -0.547837 | 0.246646 | 0.842397 | 1.374244 | -1.213337 | -0.836970 | -1.061880 | 0.445910 | -0.240524 | 0.818263 | -1.145204 | -0.652396 | -1.454522 | -1.350311 | -0.748239 | 1.094022 | 0.496164 | 0.291302 | -0.021706 | 1.384924 | 2.188752 | 0.427537 | -0.540029 | -0.402436 | 0.263044 | 1.126636 | 0.818492 | -1.084867 | -0.777840 | -1.156849 | 0.150072 | -0.151041 | 0.160017 | -1.158610 | -0.724767 | -1.481028 | -1.148549 | -1.060825 | 1.138146 | 0.097964 | 0.181290 | 0.068909 | 1.746760 | 2.522271 | 0.638805 | -0.363088 | -0.016735 | -0.108448 | 1.061750 | 0.936679 | -1.093320 | -1.129213 | -1.258457 | 0.759771 | 0.511325 | 1.550209 | -1.309304 | -0.281704 | -1.516510 | -1.239510 | -0.933464 | 0.154529 | -0.326074 | -0.101655 | 0.455826 | 2.016972 | 1.575466 | -0.069357 | -0.185581 | 0.533793 | -0.699799 | 1.152556 | 0.896781 | -1.046621 | -1.170381 | -1.218255 | 1.496725 | 0.998083 | 1.133697 | -0.938286 | -0.015171 | -1.440916 | -1.188007 | -0.683514 | 0.378795 | -0.640599 | 0.183254 | -0.548098 | 1.053966 | 2.207037 | 0.354827 | -0.519037 | 0.394696 | 0.244732 | 0.381438 | 1.113125 | -0.899122 | -1.096222 | -1.173454 | 0.687293 | -0.046008 | 0.940206 | -0.894093 | -0.104996 | -1.368054 | -1.066989 | -0.855887 | 0.294873 | 0.093265 | -0.139776 | 0.133550 | 1.779457 | 1.869632 | 0.243867 | -0.375760 | 0.071816 | 0.692799 | 0.415830 | 0.703685 | -0.878906 | -1.026634 | -1.112752 | 1.394995 | 0.792975 | 2.919907 | -0.848465 | -0.122632 | -1.400679 | -1.258569 | -0.974020 | 0.270461 | 0.788220 | -0.574408 | -0.295961 | 0.706526 | 0.730792 | -0.166971 | -0.385794 | -0.312091 | 0.376540 | 0.884624 | 0.977607 | -1.104369 | -0.602411 | -0.914841 | 0.743480 | -0.106034 | 1.201335 | -0.814697 | -0.223407 | -1.285807 | -1.231427 | -0.859937 | 1.158588 | 0.774048 | -0.271745 | -0.174051 | 0.883699 | 1.405990 | -0.198720 | -0.331293 | -0.493096 | -0.617485 | -0.773803 | 1.381849 |
4 | 0.005970 | 0.751114 | 1.085722 | -1.039904 | -0.406701 | -0.679811 | 0.072615 | -0.075559 | 0.692090 | -1.072586 | -0.460515 | -1.288880 | -1.222914 | -0.716772 | 1.364289 | 0.482584 | 0.263830 | 0.171767 | 1.651816 | 2.032803 | -0.388215 | -0.720607 | -0.454893 | 0.265395 | 1.379802 | 0.589531 | -0.818324 | -0.519316 | -0.746067 | 0.413932 | -0.178369 | 1.170074 | -0.894897 | -0.355494 | -1.330207 | -1.312370 | -0.802593 | 0.591601 | 0.688329 | -0.156697 | -0.163164 | 1.411729 | 1.699529 | 0.206069 | -0.666106 | -0.523285 | 0.173828 | 0.883171 | 1.294354 | -1.112042 | -0.686691 | -0.939822 | 0.276185 | -0.307773 | 0.697519 | -0.985738 | -0.642088 | -1.393248 | -1.392490 | -0.732761 | 0.973595 | 0.358805 | 0.335087 | 0.064024 | 1.482641 | 2.376040 | 0.233742 | -0.572048 | -0.423111 | 0.447690 | 1.153062 | 0.833891 | -1.003197 | -0.699037 | -0.993186 | 0.153248 | -0.209088 | 0.223381 | -1.165334 | -0.741596 | -1.444063 | -1.076746 | -0.977388 | 0.960369 | -0.033733 | 0.309105 | 0.115317 | 1.677490 | 2.432875 | 0.478845 | -0.499757 | -0.138839 | -0.151743 | 0.650876 | 0.836831 | -0.989553 | -1.087273 | -1.087959 | 0.785815 | 0.776908 | 1.770988 | -1.228824 | -0.357408 | -1.430037 | -1.258179 | -0.797396 | 0.152119 | -0.311512 | -0.174495 | 0.422862 | 1.711298 | 1.606907 | -0.111176 | -0.190658 | 0.401723 | -0.786975 | 1.135739 | 1.058611 | -0.969714 | -0.932000 | -1.149220 | 1.275183 | 0.763192 | 1.097840 | -0.851838 | -0.135197 | -1.440695 | -1.176001 | -0.535466 | 0.188978 | -0.533128 | 0.373298 | -0.448216 | 1.071386 | 2.115694 | 0.146461 | -0.588486 | 0.263037 | 0.268874 | 0.469420 | 1.094261 | -0.928724 | -1.012780 | -1.003566 | 0.759516 | -0.058255 | 0.952261 | -0.984737 | -0.179058 | -1.345385 | -1.038068 | -0.822988 | 0.324975 | 0.130887 | -0.013518 | 0.059971 | 1.701512 | 2.136519 | 0.050153 | -0.482915 | -0.053709 | 0.684908 | 0.572248 | 0.710637 | -0.836325 | -0.807433 | -0.992573 | 1.345920 | 0.821516 | 2.887753 | -0.706492 | -0.110270 | -1.351152 | -1.205737 | -0.737653 | 0.244887 | 0.633224 | -0.466895 | -0.268452 | 0.583513 | 0.529670 | -0.291038 | -0.485136 | -0.285899 | 0.483719 | 0.887183 | 1.036619 | -1.038690 | -0.437466 | -0.841210 | 0.719239 | -0.163292 | 1.080628 | -0.780459 | -0.308839 | -1.277385 | -1.213813 | -0.850979 | 1.037729 | 0.641427 | -0.145962 | -0.076124 | 0.989158 | 1.263439 | -0.166760 | -0.514443 | -0.423403 | -0.676674 | -0.746608 | 1.362359 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
385 | 0.578521 | 0.815616 | -0.008575 | 0.070166 | 0.041314 | -0.172278 | -0.091158 | -0.266501 | -0.299746 | -0.250539 | 0.037579 | 0.365901 | 0.166937 | 0.305540 | -0.234406 | 0.311013 | -0.110569 | -0.114513 | -0.452355 | -0.358867 | -0.177268 | -0.018237 | 0.172704 | 0.274292 | 0.847559 | 0.161803 | 0.157351 | -0.062481 | 0.004793 | -0.146151 | -0.365771 | -0.515028 | -0.126082 | 0.122814 | 0.157954 | 0.266116 | 0.356241 | -0.009107 | 0.023418 | -0.148322 | 0.061621 | -0.206390 | -0.161031 | -0.145769 | -0.173076 | -0.111084 | 0.722043 | 1.151718 | 0.075962 | 0.111401 | -0.285507 | -0.096357 | -0.541320 | -0.756826 | -0.717633 | -0.359527 | -0.290220 | -0.110387 | -0.381590 | 0.329526 | -0.013344 | 0.645284 | 0.069591 | -0.080180 | -0.271745 | -0.389325 | 0.168342 | 0.324664 | 0.071264 | 1.154482 | 2.082457 | 0.152455 | 0.251916 | -0.405427 | -0.261448 | -0.662001 | -0.681511 | -0.454917 | -0.063818 | -0.606271 | -0.279721 | -0.684625 | 0.440753 | -0.405008 | 0.477990 | 0.301193 | 0.743176 | -0.173409 | -0.523468 | -0.174576 | -0.337592 | -0.310680 | -0.409673 | -0.728864 | -0.224772 | -0.421970 | -0.001823 | -0.022175 | 0.726590 | 0.268676 | 0.504831 | 0.188382 | 0.003810 | 0.641346 | -0.001090 | 0.389831 | 0.532993 | 1.586001 | 0.033036 | -0.142395 | -0.569806 | -0.779241 | -0.615640 | -0.564244 | 0.172966 | -1.446164 | -1.158714 | -0.324238 | -0.100830 | 0.009197 | -0.054438 | 1.004409 | 0.018712 | -0.682721 | 0.401000 | -0.044198 | 0.451011 | 0.404542 | 0.097068 | 1.442745 | 3.523521 | 1.326161 | -0.442833 | -1.214377 | -0.791535 | -0.586027 | -0.601752 | -0.526851 | -1.061495 | -0.837982 | -0.073638 | 1.031971 | -0.445046 | -0.147461 | 1.855818 | 1.188337 | 0.445286 | 1.252941 | -0.193166 | 0.172993 | -0.162267 | 0.209781 | 0.546619 | 0.245775 | 0.375268 | -0.744369 | -1.455791 | -1.201336 | -0.631667 | -0.685654 | 0.551269 | -0.736841 | 1.236806 | 0.449564 | -0.154367 | -0.164832 | -0.148053 | -0.329820 | -0.414346 | -0.298004 | -0.010251 | 0.186727 | 0.785969 | 0.857228 | 1.468341 | -0.210802 | 1.092909 | -0.021411 | 0.736797 | -1.086275 | -1.293551 | -0.710390 | -0.662196 | -0.044366 | -0.379483 | 0.707046 | 0.061626 | 0.087535 | 0.007229 | -0.019088 | -0.553130 | -0.311879 | -0.316806 | 0.212299 | -0.013452 | 0.280088 | 0.704376 | 0.256359 | 0.145685 | 0.728678 | -0.053264 | 0.159969 | -0.264172 | -0.542244 | -0.728845 | -0.736948 | 0.484063 | -0.014160 | 0.981396 | -0.823922 |
386 | -0.343952 | -0.155117 | 0.006063 | 0.285334 | -0.023862 | -0.073818 | -0.028110 | -0.030005 | -0.120587 | -0.063003 | 0.280372 | 0.288623 | 0.244785 | 0.197624 | 1.120448 | 1.342140 | -0.393144 | -0.255975 | -0.764882 | -0.688033 | -0.411714 | -0.402310 | -0.559895 | -0.188687 | -0.186149 | 0.085229 | 0.209445 | -0.110868 | -0.112135 | -0.038973 | -0.186215 | -0.290907 | -0.101395 | -0.244995 | 0.194110 | 0.174400 | 0.261491 | 1.335250 | 1.739041 | -0.331995 | -0.067029 | -0.969707 | -0.835346 | -0.225157 | -0.148649 | -0.401451 | -0.357386 | -0.172101 | -0.267598 | 0.055623 | -0.301132 | -0.293372 | -0.312323 | -0.360072 | -0.296617 | 0.118884 | -0.607369 | -0.253655 | -0.284446 | 0.472431 | 2.497014 | 4.099475 | -0.093472 | -0.581561 | -1.095450 | -1.027082 | -0.416788 | -0.326693 | -0.437598 | 0.242633 | 1.007429 | 0.612549 | 0.445686 | -0.113361 | -0.165806 | -0.077746 | -0.558640 | -0.442297 | -0.442617 | -0.120817 | -0.194407 | -0.131095 | 0.800921 | 0.920657 | 3.575207 | -0.188720 | -0.316105 | -1.465925 | -1.350469 | -0.464359 | -0.489196 | -0.903108 | -0.498298 | 0.109299 | -0.114649 | -0.001037 | 0.134193 | 0.014040 | 0.015718 | -0.352781 | 0.098042 | 0.576062 | -0.191512 | 0.245011 | -0.089833 | 0.270160 | 1.991614 | 2.207065 | 0.346616 | -0.165267 | -1.167193 | -1.213787 | -0.807511 | -0.884333 | -0.417336 | -1.620930 | -0.555660 | -0.464463 | 0.605316 | 0.025533 | -0.010126 | 0.147222 | 0.569386 | 1.156091 | 1.878517 | 0.915935 | 0.596800 | 0.249085 | -0.144031 | 0.167630 | 1.245727 | 0.605658 | -0.114234 | -1.367335 | -1.328265 | -0.889478 | -0.864110 | -1.000866 | -0.936579 | -0.003797 | 0.017008 | -0.051265 | -0.051686 | -0.080829 | 0.378418 | 0.772260 | 0.344638 | 0.409918 | 0.039382 | 0.462811 | 0.084301 | 0.359274 | 0.503638 | 2.669749 | -0.557002 | -0.483690 | -0.531673 | -0.898888 | -0.787395 | -0.776047 | -0.809296 | -0.849585 | -0.280741 | -0.492412 | -0.034216 | -0.011144 | -0.093382 | -0.190026 | 0.007295 | 0.291797 | 0.243285 | 1.052784 | 0.933768 | 0.930262 | 0.666788 | 0.674066 | 1.130771 | -0.423281 | -0.062838 | -0.576920 | -0.789637 | -0.839921 | -0.702896 | -0.638031 | -0.294989 | -0.192360 | -0.384952 | -0.066190 | -0.074334 | -0.004895 | 0.262383 | 0.598844 | 0.536701 | 0.483695 | 0.370696 | 0.272201 | 0.482870 | 0.014603 | 0.760624 | 0.726867 | -0.293256 | -0.392450 | -0.695027 | -0.750916 | -0.507727 | -0.535106 | -0.734783 | 1.085545 | -0.034680 | -1.120232 |
387 | -0.066611 | 0.160646 | -0.006179 | -0.204166 | -0.168144 | -0.095100 | -0.243909 | 0.001036 | 0.018622 | -0.132618 | -0.108387 | -0.101219 | 0.037201 | 0.357807 | 0.484108 | 0.846920 | -0.316675 | -0.220923 | -0.182159 | -0.201830 | -0.020689 | -0.018036 | 0.235278 | 0.006709 | 0.077673 | -0.004747 | -0.197423 | -0.078612 | -0.252352 | -0.264757 | -0.315038 | -0.140217 | -0.227206 | -0.193583 | -0.215091 | 0.134330 | -0.096614 | 0.246689 | 0.545683 | -0.251511 | -0.228500 | -0.169469 | -0.057055 | 0.460213 | 0.310485 | 0.341025 | -0.166372 | -0.354935 | -0.295832 | -0.178881 | -0.292668 | -0.360785 | -0.451096 | -0.513882 | -0.657880 | -0.478158 | -0.491899 | -0.434251 | -0.406700 | -0.212107 | 0.124407 | 3.893183 | -0.286749 | -0.499564 | -0.526325 | -0.124638 | 1.252761 | 1.312268 | 0.342483 | 0.004115 | -0.056000 | -0.110703 | 0.007906 | -0.230503 | -0.064162 | -0.338650 | -0.398219 | 0.155609 | -0.084716 | -0.036575 | -0.142463 | -0.120580 | -0.222994 | -0.008855 | 0.615924 | 0.163314 | -0.006318 | -0.705759 | -0.551729 | 0.300174 | 0.248571 | 1.596671 | 0.455188 | 0.153293 | -0.110889 | -0.167419 | -0.122656 | -0.197031 | -0.184998 | -0.355753 | -0.013585 | 0.091351 | 0.036043 | 0.073222 | -0.143890 | 0.139732 | 0.614046 | 0.811038 | -0.067180 | 0.125167 | -0.353471 | -0.382233 | -0.262606 | -0.128705 | -0.115752 | -0.930421 | -0.507747 | -0.406452 | -0.126812 | -0.148081 | -0.326349 | 0.227474 | 0.215811 | -0.184397 | 0.205914 | 1.248248 | -0.133319 | -0.132941 | -0.284734 | -0.096872 | 0.267902 | 0.014163 | -0.482787 | -0.561293 | -0.408395 | 0.966641 | 0.926225 | 0.544455 | -0.586569 | -0.499789 | -0.065891 | -0.199973 | -0.439885 | -0.387354 | -0.157257 | -0.178767 | -0.327227 | -0.351513 | -0.125342 | -0.064263 | 0.032916 | -0.383703 | -0.438517 | -0.171442 | -0.539449 | -0.618777 | -0.057934 | -0.143144 | -0.004363 | -0.055794 | 4.528556 | -0.463506 | -0.244622 | -0.200806 | 0.088204 | -0.189010 | -0.130377 | -0.079691 | -0.034115 | -0.205404 | -0.151083 | -0.100164 | 0.088742 | 0.401504 | -0.072054 | -0.382634 | -0.209524 | -0.169464 | -0.283133 | 0.080513 | 0.219396 | 0.427655 | 0.448607 | 1.228775 | 0.066399 | -0.074440 | -0.069934 | -0.223483 | -0.160704 | -0.181497 | -0.348366 | -0.218320 | -0.077495 | -0.241710 | -0.254692 | -0.243633 | -0.004971 | -0.168279 | -0.118715 | -0.021867 | -0.164025 | -0.209928 | 0.084288 | 0.064790 | 0.679144 | 0.696433 | 1.445132 | 0.759144 | 0.359163 | -1.111332 |
388 | 0.122353 | 0.206068 | 0.121852 | 0.135813 | -0.106405 | -0.109500 | -0.318797 | -0.224553 | -0.402368 | -0.253572 | -0.036752 | 0.022141 | 0.066784 | -0.063115 | 0.251984 | 0.423665 | -0.267971 | -0.062237 | -0.152510 | -0.180560 | 0.266860 | 0.226020 | 0.638867 | 0.088377 | 0.154580 | 0.005537 | -0.090565 | 0.017880 | -0.177648 | -0.368970 | -0.474865 | -0.318903 | -0.211556 | -0.036925 | -0.068680 | 0.345664 | -0.108639 | 0.092657 | 0.241915 | -0.125310 | -0.220962 | -0.083301 | 0.101778 | 0.665436 | 0.661190 | 0.391846 | 0.032425 | -0.122417 | -0.246262 | -0.158146 | -0.240017 | -0.122328 | -0.403602 | -0.548235 | -0.350910 | -0.150657 | -0.175774 | -0.052711 | -0.224810 | -0.142411 | -0.019941 | 0.819984 | -0.153961 | -0.405981 | -0.366120 | -0.108321 | 1.687876 | 1.622842 | 0.652195 | -0.479712 | -0.341194 | -0.167773 | 0.185297 | -0.248528 | -0.174750 | -0.413215 | -0.459817 | 0.526658 | 0.149524 | 0.233992 | 0.404246 | -0.384420 | -0.202454 | -0.232836 | 0.875517 | -0.114594 | -0.221065 | -0.712393 | -0.532897 | 0.319931 | 0.323008 | 2.141544 | -0.064749 | -0.010680 | 0.034973 | 0.001995 | -0.051811 | -0.032508 | -0.348255 | -0.522988 | 0.082399 | 0.101567 | -0.123955 | 0.217515 | -0.207852 | -0.092817 | 0.690799 | 0.952746 | 0.093686 | 0.173415 | -0.280747 | -0.262276 | -0.089357 | -0.138289 | 0.316182 | -0.668020 | -0.985175 | -0.451458 | -0.087931 | -0.302541 | -0.245176 | 0.238130 | 0.574219 | 0.316361 | 0.675789 | 0.944873 | 0.063380 | -0.064271 | -0.529131 | -0.110035 | 0.152777 | -0.179061 | -0.656349 | -1.086887 | -0.572069 | 0.924667 | 0.833399 | 0.543780 | -0.580854 | -0.631862 | -0.545981 | -0.427226 | -0.472761 | -0.538179 | -0.563131 | -0.538459 | -0.570086 | -0.314039 | -0.380604 | 0.047765 | -0.288111 | -0.177088 | -0.163989 | 0.743438 | -0.539873 | -0.361177 | -0.369740 | -0.456937 | 0.372878 | 0.371784 | 5.449268 | -0.405970 | -0.293095 | -0.158044 | 0.175940 | -0.249805 | -0.177893 | -0.147267 | -0.088540 | -0.072262 | -0.206695 | 0.068685 | 0.125533 | 0.060028 | -0.050328 | -0.266684 | -0.032236 | -0.238533 | -0.261867 | -0.173550 | -0.290882 | 0.612625 | 0.566128 | 1.814760 | 0.186249 | -0.183579 | -0.175168 | -0.185604 | -0.089263 | -0.080009 | -0.237149 | -0.422907 | -0.339149 | -0.322928 | -0.295930 | -0.266436 | -0.128413 | -0.140600 | 0.191414 | 0.118732 | -0.214912 | -0.150141 | -0.197409 | -0.067330 | 0.571728 | 0.583210 | 1.916396 | 0.428484 | 0.622558 | -1.158134 |
389 | 0.188459 | 0.439853 | 0.060391 | -0.086527 | -0.111686 | -0.140101 | -0.463691 | -0.544235 | -0.622486 | -0.254746 | -0.199929 | -0.062966 | 0.097649 | 0.434262 | 0.265382 | 0.478620 | -0.427277 | -0.060230 | -0.091186 | -0.309038 | 0.539724 | 0.559802 | 0.440024 | 0.128444 | 0.499696 | 0.072232 | 0.075077 | -0.052714 | -0.098313 | -0.613954 | -0.814414 | -0.586662 | -0.110255 | 0.008663 | 0.077707 | 0.526670 | 0.362162 | 0.250743 | 0.447850 | -0.353245 | 0.000355 | 0.057385 | -0.129434 | 0.129044 | 0.264084 | -0.151309 | 0.187910 | 0.701375 | -0.176368 | 0.160061 | -0.201854 | -0.140544 | -0.994704 | -1.104489 | -0.316708 | -0.046821 | -0.307645 | 0.054225 | 0.131244 | 0.156841 | 0.514177 | 1.656849 | -0.312444 | 0.051022 | -0.255884 | -0.577326 | 0.415048 | 0.403577 | 0.101188 | -0.175350 | 0.444570 | 0.352650 | 0.795194 | -0.160316 | -0.056730 | -0.546945 | -0.769426 | -0.528751 | -0.205588 | 0.239008 | 0.641354 | 0.262148 | 0.426456 | -0.454494 | 0.724283 | -0.200243 | 0.330112 | 0.008259 | -0.999883 | 0.017019 | 0.072978 | 0.122243 | -0.523335 | 1.182480 | -0.105016 | -0.272883 | -0.000234 | -0.081880 | -1.263563 | -1.332233 | -0.173574 | 0.552361 | -0.588848 | 0.421488 | -0.713752 | 0.874099 | 1.210574 | 2.066226 | 0.211755 | 0.655666 | -0.286226 | -0.599556 | -0.776689 | -0.756884 | 0.246952 | -1.647672 | -0.860567 | -0.548863 | 0.526335 | -0.349274 | -0.382024 | 1.129147 | 1.860528 | 1.245924 | 2.198426 | 0.931788 | 0.749532 | 1.861245 | -0.052901 | -0.131282 | 0.375158 | 0.008166 | -0.912338 | -1.778002 | -1.599741 | -0.825851 | -0.703281 | -1.259517 | -0.908033 | -0.635809 | -0.452992 | 0.022787 | -0.297470 | -0.285614 | -0.502733 | -0.523386 | -0.616326 | 0.057821 | 0.086172 | 1.968491 | 0.404133 | 1.909161 | 2.396706 | 3.200826 | -0.365028 | 0.028798 | -1.295352 | -1.507143 | -0.796399 | -0.895243 | -1.172440 | -0.527432 | -0.025165 | -0.362537 | 0.052917 | 0.104496 | 0.161911 | -0.352495 | 0.092749 | 0.535177 | 0.261345 | 0.743398 | 0.675169 | 1.295758 | 0.199683 | 0.695838 | 1.587987 | -0.377386 | -0.247616 | -1.245401 | -1.319967 | -0.703791 | -0.856706 | -0.503849 | 0.439088 | 0.397687 | -0.232954 | -0.087055 | 0.049947 | -0.109176 | -0.509142 | -0.155921 | -0.209409 | -0.040702 | -0.241797 | 0.307563 | 0.482718 | 0.484882 | 1.912625 | 1.505288 | 0.072022 | 0.128418 | -0.799038 | -0.709763 | -0.688117 | -0.758006 | -0.777439 | -0.206744 | 1.409324 | -1.200882 |
390 rows × 210 columns
OOF Pearson score by family
= score_each(oof_ensemble,y) _,_,corr
overall MSE: 0.3086
Average Pearson: 0.8170
= pd.concat([corr, info],axis=1) corr
= corr.groupby('family').agg({'Pearson':'mean','kinase':'count'}) corr_family
'Pearson',ascending=False) corr_family.sort_values(
Pearson | kinase | |
---|---|---|
family | ||
Sev | 0.981550 | 1 |
Met | 0.979476 | 2 |
InsR | 0.978676 | 3 |
Eph | 0.976189 | 12 |
Akt | 0.974935 | 3 |
... | ... | ... |
KIS | 0.102436 | 1 |
Bud32 | 0.096503 | 1 |
FAM20C | 0.086976 | 1 |
CDC7 | -0.028638 | 1 |
WEE | -0.090115 | 1 |
100 rows × 2 columns
'raw/oof_corr_family.csv') corr_family.to_csv(
= corr.groupby('subfamily').agg({'Pearson':'mean','kinase':'count'}) corr_subfamily
'kinase',ascending=False) corr_subfamily.sort_values(
Pearson | kinase | |
---|---|---|
subfamily | ||
Eph | 0.976189 | 12 |
Src | 0.967152 | 11 |
NEK | 0.778235 | 10 |
STE11 | 0.749903 | 7 |
CK1 | 0.948548 | 7 |
... | ... | ... |
SNRK | 0.700188 | 1 |
NKF2 | 0.301151 | 1 |
CDK9 | 0.943745 | 1 |
NKF1 | 0.734262 | 1 |
MOS | 0.173084 | 1 |
160 rows × 2 columns
'raw/oof_corr_subfamily.csv') corr_subfamily.to_csv(
Plot
set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.'notebook')
sns.set_context("ticks") sns.set_style(
corr
Pearson | kinase | source | ID_coral | uniprot | ID_HGNC | group | family | subfamily_coral | subfamily | in_ST_paper | in_Tyr_paper | in_cddm | pseudo | pspa_category_small | pspa_category_big | cddm_big | cddm_small | length | human_uniprot_sequence | kinasecom_domain | nucleus | cytosol | cytoskeleton | plasma membrane | mitochondrion | Golgi apparatus | endoplasmic reticulum | vesicle | centrosome | aggresome | main_location | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.980211 | SRC | KS | SRC | P12931 | SRC | TK | Src | None | Src | 0 | 1 | 1 | 0 | SRC | SRC | 1.0 | 2.0 | 536 | MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL | LRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAF | NaN | 2.0 | NaN | 6.0 | NaN | 2.0 | NaN | NaN | NaN | NaN | plasma membrane |
1 | 0.986307 | EPHA3 | KS | EphA3 | P29320 | EPHA3 | TK | Eph | None | Eph | 0 | 1 | 1 | 0 | Ephrin receptors | Ephrin receptors | 1.0 | 2.0 | 983 | MDCQLSILLLLSCSVLDSFGELIPQPSNEVNLLDSKTIQGELGWISYPSHGWEEISGVDEHYTPIRTYQVCNVMDHSQNNWLRTNWVPRNSAQKIYVELKFTLRDCNSIPLVLGTCKETFNLYYMESDDDHGVKFREHQFTKIDTIAADESFTQMDLGDRILKLNTEIREVGPVNKKGFYLAFQDVGACVALVSVRVYFKKCPFTVKNLAMFPDTVPMDSQSLVEVRGSCVNNSKEEDPPRMYCSTEGEWLVPIGKCSCNAGYEERGFMCQACRPGFYKALDGNMKCAKCPPHSSTQEDGSMNCRCENNYFRADKDPPSMACTRPPSSPRNVISNINETSVILDWSWPLDTGGRKDVTFNIICKKCGWNIKQCEPCSPNVRFLPRQFGLTNTTVTVTDLLAHTNYTFEIDAVNGVSELSSPPRQFAAVSITTNQAAPSPVLTIKKDRTSRNSISLSWQEPEHPNGIILDYEVKYYEKQEQETSYTILRARGTNVTISSLKPDTIYVFQIRARTAAGYGTNSRKFEFETSPDSFSISGESSQVVMIAISAAVAIILLTVVIYVLIGRFCGYKSKHGADEKRLHFGNGHLKLPGLRTY... | ISIDKVVGAGEFGEVCSGRLKLPSKKEISVAIKTLKVGYTEKQRRDFLGEASIMGQFDHPNIIRLEGVVTKSKPVMIVTEYMENGSLDSFLRKHDAQFTVIQLVGMLRGIASGMKYLSDMGYVHRDLAARNILINSNLVCKVSDFGLSRVLEDDPEAAYTTRGGKIPIRWTSPEAIAYRKFTSASDVWSYGIVLWEVMSYGERPYWEMSNQDVIKAVDEGYRLPPPMDCPAALYQLMLDCWQKDRNNRPKFEQIVSI | NaN | 1.0 | NaN | 6.0 | NaN | 3.0 | NaN | NaN | NaN | NaN | plasma membrane |
2 | 0.954302 | FES | KS | FES | P07332 | FES | TK | Fer | None | Fer | 0 | 1 | 1 | 0 | TAM receptors | TAM receptors | 1.0 | 2.0 | 822 | MGFSSELCSPQGHGVLQQMQEAELRLLEGMRKWMAQRVKSDREYAGLLHHMSLQDSGGQSRAISPDSPISQSWAEITSQTEGLSRLLRQHAEDLNSGPLSKLSLLIRERQQLRKTYSEQWQQLQQELTKTHSQDIEKLKSQYRALARDSAQAKRKYQEASKDKDRDKAKDKYVRSLWKLFAHHNRYVLGVRAAQLHHQHHHQLLLPGLLRSLQDLHEEMACILKEILQEYLEISSLVQDEVVAIHREMAAAAARIQPEAEYQGFLRQYGSAPDVPPCVTFDESLLEEGEPLEPGELQLNELTVESVQHTLTSVTDELAVATEMVFRRQEMVTQLQQELRNEEENTHPRERVQLLGKRQVLQEALQGLQVALCSQAKLQAQQELLQTKLEHLGPGEPPPVLLLQDDRHSTSSSEQEREGGRTPTLEILKSHISGIFRPKFSLPPPLQLIPEVQKPLHEQLWYHGAIPRAEVAELLVHSGDFLVRESQGKQEYVLSVLWDGLPRHFIIQSLDNLYRLEGEGFPSIPLLIDHLLSTQQPLTKKSGVVLHRAVPKDKWVLNHEDLVLGEQIGRGNFGEVFSGRLRADNTLVAVKSCRETL... | LVLGEQIGRGNFGEVFSGRLRADNTLVAVKSCRETLPPDLKAKFLQEARILKQYSHPNIVRLIGVCTQKQPIYIVMELVQGGDFLTFLRTEGARLRVKTLLQMVGDAAAGMEYLESKCCIHRDLAARNCLVTEKNVLKISDFGMSREEADGVYAASGGLRQVPVKWTAPEALNYGRYSSESDVWSFGILLWETFSLGASPYPNLSNQQTREFVEKGGRLPCPELCPDAVFRLMEQCWAYEPGQRPSFSTIYQELQS | NaN | 6.0 | NaN | 4.0 | NaN | NaN | NaN | NaN | NaN | NaN | cytosol |
3 | 0.973414 | NTRK3 | KS | TRKC | Q16288 | NTRK3 | TK | Trk | None | Trk | 0 | 1 | 1 | 0 | Insulin and neurotrophin receptors | Insulin and neurotrophin receptors | 1.0 | 3.0 | 839 | MDVSLCPAKCSFWRIFLLGSVWLDYVGSVLACPANCVCSKTEINCRRPDDGNLFPLLEGQDSGNSNGNASINITDISRNITSIHIENWRSLHTLNAVDMELYTGLQKLTIKNSGLRSIQPRAFAKNPHLRYINLSSNRLTTLSWQLFQTLSLRELQLEQNFFNCSCDIRWMQLWQEQGEAKLNSQNLYCINADGSQLPLFRMNISQCDLPEISVSHVNLTVREGDNAVITCNGSGSPLPDVDWIVTGLQSINTHQTNLNWTNVHAINLTLVNVTSEDNGFTLTCIAENVVGMSNASVALTVYYPPRVVSLEEPELRLEHCIEFVVRGNPPPTLHWLHNGQPLRESKIIHVEYYQEGEISEGCLLFNKPTHYNNGNYTLIAKNPLGTANQTINGHFLKEPFPESTDNFILFDEVSPTPPITVTHKPEEDTFGVSIAVGLAAFACVLLVVLFVMINKYGRRSKFGMKGPVAVISGEEDSASPLHHINHGITTPSSLDAGPDTVVIGMTRIPVIENPQYFRQGHNCHKPDTYVQHIKRRDIVLKRELGEGAFGKVFLAECYNLSPTKDKMLVAVKALKDPTLAARKDFQREAELLTNLQ... | IVLKRELGEGAFGKVFLAECYNLSPTKDKMLVAVKALKDPTLAARKDFQREAELLTNLQHEHIVKFYGVCGDGDPLIMVFEYMKHGDLNKFLRAHGPDAMILVDGQPRQAKGELGLSQMLHIASQIASGMVYLASQHFVHRDLATRNCLVGANLLVKIGDFGMSRDVYSTDYYRVGGHTMLPIRWMPPESIMYRKFTTESDVWSFGVILWEIFTYGKQPWFQLSNTEVIECITQGRVLERPRVCPKEVYDVMLGCWQREPQQRLNIKEIYKI | NaN | 4.0 | NaN | 4.0 | NaN | 2.0 | NaN | NaN | NaN | NaN | cytosol |
4 | 0.981001 | ALK | KS | ALK | Q9UM73 | ALK | TK | ALK | None | ALK | 0 | 1 | 1 | 0 | PDGF receptors | PDGF receptors | 1.0 | 3.0 | 1620 | MGAIGLLWLLPLLLSTAAVGSGMGTGQRAGSPAAGPPLQPREPLSYSRLQRKSLAVDFVVPSLFRVYARDLLLPPSSSELKAGRPEARGSLALDCAPLLRLLGPAPGVSWTAGSPAPAEARTLSRVLKGGSVRKLRRAKQLVLELGEEAILEGCVGPPGEAAVGLLQFNLSELFSWWIRQGEGRLRIRLMPEKKASEVGREGRLSAAIRASQPRLLFQIFGTGHSSLESPTNMPSPSPDYFTWNLTWIMKDSFPFLSHRSRYGLECSFDFPCELEYSPPLHDLRNQSWSWRRIPSEEASQMDLLDGPGAERSKEMPRGSFLLLNTSADSKHTILSPWMRSSSEHCTLAVSVHRHLQPSGRYIAQLLPHNEAAREILLMPTPGKHGWTVLQGRIGRPDNPFRVALEYISSGNRSLSAVDFFALKNCSEGTSPGSKMALQSSFTCWNGTVLQLGQACDFHQDCAQGEDESQMCRKLPVGFYCNFEDGFCGWTQGTLSPHTPQWQVRTLKDARFQDHQDHALLLSTTDVPASESATVTSATFPAPIKSSPCELRMSWLIRGVLRGNVSLVLVENKTGKEQGRMVWHVAAYEGLSLWQWM... | ITLIRGLGHGAFGEVYEGQVSGMPNDPSPLQVAVKTLPEVCSEQDELDFLMEALIISKFNHQNIVRCIGVSLQSLPRFILLELMAGGDLKSFLRETRPRPSQPSSLAMLDLLHVARDIACGCQYLEENHFIHRDIAARNCLLTCPGPGRVAKIGDFGMARDIYRASYYRKGGCAMLPVKWMPPEAFMEGIFTSKTDTWSFGVLLWEIFSLGYMPYPSKSNQEVLEFVTSGGRMDPPKNCPGPVYRIMTQCWQHQPEDRPNFAIILERIEY | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
385 | 0.698910 | VRK2 | PSPA | VRK2 | Q86Y07 | VRK2 | CK1 | VRK | None | VRK | 1 | 0 | 0 | 0 | assorted | assorted | NaN | NaN | 508 | MPPKRNEKYKLPIPFPEGKVLDDMEGNQWVLGKKIGSGGFGLIYLAFPTNKPEKDARHVVKVEYQENGPLFSELKFYQRVAKKDCIKKWIERKQLDYLGIPLFYGSGLTEFKGRSYRFMVMERLGIDLQKISGQNGTFKKSTVLQLGIRMLDVLEYIHENEYVHGDIKAANLLLGYKNPDQVYLADYGLSYRYCPNGNHKQYQENPRKGHNGTIEFTSLDAHKGVALSRRSDVEILGYCMLRWLCGKLPWEQNLKDPVAVQTAKTNLLDELPQSVLKWAPSGSSCCEIAQFLVCAHSLAYDEKPNYQALKKILNPHGIPLGPLDFSTKGQSINVHTPNSQKVDSQKAATKQVNKAHNRLIEKKVHSERSAESCATWKVQKEEKLIGLMNNEAAQESTRRRQKYQESQEPLNEVNSFPQKISYTQFPNSFYEPHQDFTSPDIFKKSRSPSWYKYTSTVSTGITDLESSTGLWPTISQFTLSEETNADVYYYRIIIPVLLMLVFLALFFL | WVLGKKIGSGGFGLIYLAFPTNKPEKDARHVVKVEYQENGPLFSELKFYQRVAKKDCIKKWIERKQLDYLGIPLFYGSGLTEFKGRSYRFMVMERLGIDLQKISGQNGTFKKSTVLQLGIRMLDVLEYIHENEYVHGDIKAANLLLGYKNPDQVYLADYGLSYRYCPNGNHKQYQENPRKGHNGTIEFTSLDAHKGVALSRRSDVEILGYCMLRWLCGKLPWEQNLKDPVAVQTAKTNLLDELPQSVLKWAPSGSSCCEIAQFL | NaN | NaN | NaN | NaN | NaN | NaN | 10.0 | NaN | NaN | NaN | endoplasmic reticulum |
386 | 0.885726 | WNK4 | PSPA | Wnk4 | Q96J92 | WNK4 | Other | WNK | None | WNK | 1 | 0 | 0 | 0 | RIPK/WNK | RIPK/WNK | NaN | NaN | 444 | MLASPATETTVLMSQTEADLALRPPPPLGTAGQPRLGPPPRRARRFSGKAEPRPRSSRLSRRSSVDLGLLSSWSLPASPAPDPPDPPDSAGPGPARSPPPSSKEPPEGTWTEGAPVKAAEDSARPELPDSAVGPGSREPLRVPEAVALERRREQEEKEDMETQAVATSPDGRYLKFDIEIGRGSFKTVYRGLDTDTTVEVAWCELQTRKLSRAERQRFSEEVEMLKGLQHPNIVRFYDSWKSVLRGQVCIVLVTELMTSGTLKTYLRRFREMKPRVLQRWSRQILRGLHFLHSRVPPILHRDLKCDNVFITGPTGSVKIGDLGLATLKRASFAKSVIGTPEFMAPEMYEEKYDEAVDVYAFGMCMLEMATSEYPYSECQNAAQIYRKVTSGRKPNSFHKVKIPEVKEIIEGCIRTDKNERFTIQDLLAHAFFREERGVHVELAEEDDGEKPGLKLWLRMEDARRGGRPRDNQAIEFLFQLGRDAAEEVAQEMVALGLVCEADYQPVARAVRERVAAIQRKREKLRKARELEALPPEPGPPPATVPMAPGPPSVFPPEPEEPEADQHQPFLFRHASYSSTTSDCETDGYLSSSGFLD... | LKFDIEIGRGSFKTVYRGLDTDTTVEVAWCELQTRKLSRAERQRFSEEVEMLKGLQHPNIVRFYDSWKSVLRGQVCIVLVTELMTSGTLKTYLRRFREMKPRVLQRWSRQILRGLHFLHSRVPPILHRDLKCDNVFITGPTGSVKIGDLGLATLKRASFAKSVIGTPEFMAPEMYEEKYDEAVDVYAFGMCMLEMATSEYPYSECQNAAQIYRKVTSGRKPNSFHKVKIPEVKEIIEGCIRTDKNERFTIQDLLAHAFF | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 10.0 | aggresome |
387 | 0.767191 | YANK2 | PSPA | YANK2 | Q9NY57 | STK32B | AGC | YANK | None | YANK | 1 | 0 | 0 | 0 | YANK | acidophilic | NaN | NaN | 414 | MGGNHSHKPPVFDENEEVNFDHFQILRAIGKGSFGKVCIVQKRDTKKMYAMKYMNKQKCIERDEVRNVFRELQIMQGLEHPFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVHFTEGTVKLYICELALALEYLQRYHIIHRDIKPDNILLDEHGHVHITDFNIATVVKGAERASSMAGTKPYMAPEVFQVYMDRGPGYSYPVDWWSLGITAYELLRGWRPYEIHSVTPIDEILNMFKVERVHYSSTWCKGMVALLRKLLTKDPESRVSSLHDIQSVPYLADMNWDAVFKKALMPGFVPNKGRLNCDPTFELEEMILESKPLHKKKKRLAKNRSRDGTKDSCPLNGHLQHCLETVREEFIIFNREKLRRQQGQGSQLLDTDSRGGGQAQSKLQDGCNNNLLTHTCTRGCSS | FQILRAIGKGSFGKVCIVQKRDTKKMYAMKYMNKQKCIERDEVRNVFRELQIMQGLEHPFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVHFTEGTVKLYICELALALEYLQRYHIIHRDIKPDNILLDEHGHVHITDFNIATVVKGAERASSMAGTKPYMAPEVFQVYMDRGPGYSYPVDWWSLGITAYELLRGWRPYEIHSVTPIDEILNMFKVERVHYSSTWCKGMVALLRKLLTKDPESRVSSLHDIQSVPYL | 1.0 | 6.0 | NaN | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | cytosol |
388 | 0.866177 | YANK3 | PSPA | YANK3 | Q86UX6 | STK32C | AGC | YANK | None | YANK | 1 | 0 | 0 | 0 | YANK | acidophilic | NaN | NaN | 421 | MRSGAERRGSSAAASPGSPPPGRARPAGSDAPSALPPPAAGQPRARDSGDVRSQPRPLFQWSKWKKRMGSSMSAATARRPVFDDKEDVNFDHFQILRAIGKGSFGKVCIVQKRDTEKMYAMKYMNKQQCIERDEVRNVFRELEILQEIEHVFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVQFSEDTVRLYICEMALALDYLRGQHIIHRDVKPDNILLDERGHAHLTDFNIATIIKDGERATALAGTKPYMAPEIFHSFVNGGTGYSFEVDWWSVGVMAYELLRGWRPYDIHSSNAVESLVQLFSTVSVQYVPTWSKEMVALLRKLLTVNPEHRLSSLQDVQAAPALAGVLWDHLSEKRVEPGFVPNKGRLHCDPTFELEEMILESRPLHKKKKRLAKNKSRDNSRDSSQSENDYLQDCLDAIQQDFVIFNREKLKRSQDLPREPLPAPESRDAAEPVEDEAERSALPMCGPICPSAGSG | FQILRAIGKGSFGKVCIVQKRDTEKMYAMKYMNKQQCIERDEVRNVFRELEILQEIEHVFLVNLWYSFQDEEDMFMVVDLLLGGDLRYHLQQNVQFSEDTVRLYICEMALALDYLRGQHIIHRDVKPDNILLDERGHAHLTDFNIATIIKDGERATALAGTKPYMAPEIFHSFVNGGTGYSFEVDWWSVGVMAYELLRGWRPYDIHSSNAVESLVQLFSTVSVQYVPTWSKEMVALLRKLLTVNPEHRLSSLQDVQAAPAL | 2.0 | 5.0 | NaN | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | cytosol |
389 | 0.638188 | YSK4 | PSPA | MAP3K19 | Q56UN5 | MAP3K19 | STE | STE11 | None | STE11 | 1 | 0 | 0 | 0 | MAP3K | MAP3K | NaN | NaN | 388 | MSSMPKPERHAESLLDICHDTNSSPTDLMTVTKNQNIILQSISRSEEFDQDGDCSHSTLVNEEEDPSGGRQDWQPRTEGVEITVTFPRDVSPPQEMSQEDLKEKNLINSSLQEWAQAHAVSHPNEIETVELRKKKLTMRPLVLQKEESSRELCNVNLGFLLPRSCLELNISKSVTREDAPHFLKEQQRKSEEFSTSHMKYSGRSIKFLLPPLSLLPTRSGVLTIPQNHKFPKEKERNIPSLTSFVPKLSVSVRQSDELSPSNEPPGALVKSLMDPTLRSSDGFIWSRNMCSFPKTNHHRQCLEKEENWKSKEIEECNKIEITHFEKGQSLVSFENLKEGNIPAVREEDIDCHGSKTRKPEEENSQYLSSRKNESSVAKNYEQDPEIVCTIPSKFQETQHSEITPSQDEEMRNNKAASKRVSLHKNEAMEPNNILEECTVLKSLSSVVFDDPIDKLPEGCSSMETNIKISIAERAKPEMSRMVPLIHITFPVDGSPKEPVIAKPSLQTRKGTIHNNHSVNIPVHQENDKHKMNSHRSKLDSKTKTSKKTPQNFVISTEGPIKPTMHKTSIKTQIFPALGLVDPRPWQLPRFQKKMPQ... | WTKGEILGKGAYGTVYCGLTSQGQLIAVKQVALDTSNKLAAEKEYRKLQEEVDLLKALKHVNIVAYLGTCLQENTVSIFMEFVPGGSISSIINRFGPLPEMVFCKYTKQILQGVAYLHENCVVHRDIKGNNVMLMPTGIIKLIDFGCARRLAWAGLNGTHSDMLKSMHGTPYWMAPEVINESGYGRKSDIWSIGCTVFEMATGKPPLASMDRMAAMFYIGAHRGLMPPLPDHFSENAADFVRMCLTRDQHERPSALQLLKHSFL | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None |
390 rows × 32 columns
= corr.group.unique()
group group
array(['TK', 'TKL', 'Other', 'AGC', 'STE', 'CAMK', 'Atypical', 'CMGC', 'CK1'], dtype=object)
Individual kinase group
= [corr.query(f'group == "{i}"') for i in group] corrs
for i in range(len(corrs)):
'Pearson','family',group[i]) plot_box(corrs[i],
In general
= load_pickle("raw/kinase_color.pkl") group_color
'Pearson','group',palette=group_color,fontsize=20)
plot_bar(corr,# plt.title('a')
Families with more counts
= corr.groupby('family').agg(family_mean=('Pearson','mean'),cnt=('Pearson','size')) family_score
'cnt',ascending=False)[:15] family_score.sort_values(
family_mean | cnt | |
---|---|---|
family | ||
STE20 | 0.863932 | 27 |
CAMKL | 0.789590 | 20 |
CDK | 0.923265 | 17 |
MAPK | 0.881876 | 12 |
Eph | 0.976189 | 12 |
Src | 0.967152 | 11 |
DYRK | 0.865820 | 10 |
NEK | 0.778235 | 10 |
PKC | 0.949881 | 9 |
STKR | 0.857840 | 9 |
RSK | 0.937107 | 8 |
STE11 | 0.749903 | 7 |
CK1 | 0.948548 | 7 |
GRK | 0.813348 | 7 |
MLK | 0.733784 | 7 |