KNN

import pandas as pd
df = pd.read_excel('out/uniprot_kd_active_D1_D2.xlsx')
pspa=pd.read_csv('out/pspa_uniprot_unique_no_TYR_category_remove2kd.csv')
pspa
uniprot kd_ID kinase group pspa_category pspa_category_detail -5P -5G -5A -5C ... 4E 4s 4t 4y 0s 0t 0y 0S 0T 0Y
0 Q2M2I8 Q2M2I8_AAK1_HUMAN_KD1 AAK1 Other NAK NAK 0.0720 0.0245 0.0284 0.0456 ... 0.0457 0.0251 0.0251 0.0270 0.1013 1.0000 0.0 0.1013 1.0000 0.0
1 P27037 P27037_AVR2A_HUMAN_KD1 ACVR2A TKL acidophilic TGFBR 0.0415 0.0481 0.0584 0.0489 ... 0.0640 0.0703 0.0703 0.0589 0.9833 1.0000 0.0 0.9833 1.0000 0.0
2 Q13705 Q13705_AVR2B_HUMAN_KD1 ACVR2B TKL acidophilic TGFBR 0.0533 0.0517 0.0566 0.0772 ... 0.0697 0.0761 0.0761 0.0637 0.9593 1.0000 0.0 0.9593 1.0000 0.0
3 P31749 P31749_AKT1_HUMAN_KD1 AKT1 AGC basophilic AKT/ROCK 0.0603 0.0594 0.0552 0.0605 ... 0.0312 0.0393 0.0393 0.0263 1.0000 0.6440 0.0 1.0000 0.6440 0.0
4 P31751 P31751_AKT2_HUMAN_KD1 AKT2 AGC basophilic AKT/ROCK 0.0602 0.0617 0.0643 0.0582 ... 0.0350 0.0548 0.0548 0.0417 1.0000 0.6077 0.0 1.0000 0.6077 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
357 P17948 P17948_VGFR1_HUMAN_KD1 FLT1 TK FGF and VEGF receptors FGF and VEGF receptors 0.0642 0.0688 0.0597 0.0618 ... 0.0510 0.0677 0.0677 0.0380 0.0000 0.0000 1.0 0.0000 0.0000 1.0
358 P35968 P35968_VGFR2_HUMAN_KD1 KDR TK FGF and VEGF receptors FGF and VEGF receptors 0.0634 0.0672 0.0556 0.0517 ... 0.0338 0.0300 0.0300 0.0292 0.0000 0.0000 1.0 0.0000 0.0000 1.0
359 P35916 P35916_VGFR3_HUMAN_KD1 FLT4 TK FGF and VEGF receptors FGF and VEGF receptors 0.0457 0.0531 0.0488 0.0553 ... 0.0497 0.0445 0.0445 0.0500 0.0000 0.0000 1.0 0.0000 0.0000 1.0
360 P07947 P07947_YES_HUMAN_KD1 YES1 TK SRC SRC 0.0677 0.0571 0.0537 0.0530 ... 0.0492 0.0371 0.0371 0.0467 0.0000 0.0000 1.0 0.0000 0.0000 1.0
361 P43403 P43403_ZAP70_HUMAN_KD1 ZAP70 TK SYK and FAK SYK and FAK 0.0602 0.0880 0.0623 0.0496 ... 0.0558 0.0440 0.0440 0.0318 0.0000 0.0000 1.0 0.0000 0.0000 1.0

362 rows × 219 columns