from katlas.feature import *
import pandas as pd
ProT5 embeddings
Extract T5 embeddings
= pd.read_excel('uniprot_kd.xlsx') df
df.shape
(5536, 27)
= get_t5(df,'kd_seq') t5feature
= t5feature.set_index(df.kd_ID) t5feature_df
# t5feature_df.to_parquet('out/uniprot_kd_t5.parquet')
Load
import pandas as pd
=pd.read_parquet('out/uniprot_kd_t5.parquet') t5
= t5.loc[t5.index.str.contains('HUMAN')] human
human
T5_0 | T5_1 | T5_2 | T5_3 | T5_4 | T5_5 | T5_6 | T5_7 | T5_8 | T5_9 | ... | T5_1014 | T5_1015 | T5_1016 | T5_1017 | T5_1018 | T5_1019 | T5_1020 | T5_1021 | T5_1022 | T5_1023 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
kd_ID | |||||||||||||||||||||
A4D2B8_PM2P1_HUMAN_KD1 | 0.036774 | 0.059082 | 0.017517 | 0.029770 | -0.031342 | 0.051331 | -0.036804 | -0.007477 | 0.003513 | 0.023254 | ... | 0.005329 | -0.007664 | -0.023956 | -0.061340 | 0.055481 | -0.052155 | -0.057739 | -0.063416 | 0.037384 | -0.042938 |
A4QPH2_PI4P2_HUMAN_KD1 | 0.085022 | 0.115601 | -0.003788 | -0.025375 | 0.002710 | 0.014046 | -0.027451 | -0.091187 | -0.025314 | -0.008125 | ... | -0.009933 | -0.020767 | -0.030838 | -0.020828 | 0.036560 | -0.056000 | -0.019379 | 0.061218 | -0.001641 | 0.055908 |
O00141_SGK1_HUMAN_KD1 | 0.044617 | 0.130127 | -0.013618 | 0.014923 | -0.008316 | -0.014488 | -0.027954 | -0.045044 | 0.008698 | -0.031647 | ... | -0.034180 | -0.009949 | -0.016205 | -0.006100 | 0.057068 | -0.041412 | -0.022461 | -0.004608 | 0.008453 | 0.003035 |
O00238_BMR1B_HUMAN_KD1 | 0.048584 | 0.143311 | 0.049774 | 0.009216 | -0.013748 | 0.034637 | -0.025513 | -0.067810 | 0.004253 | -0.000382 | ... | -0.042328 | -0.019745 | -0.037445 | -0.010078 | 0.058380 | -0.034637 | -0.032471 | -0.003811 | -0.033539 | -0.025116 |
O00311_CDC7_HUMAN_KD1 | 0.037994 | 0.018890 | -0.033600 | 0.005436 | -0.018784 | 0.082947 | -0.035309 | -0.002960 | 0.068237 | 0.008904 | ... | -0.050690 | -0.050323 | -0.017853 | -0.051605 | 0.019638 | 0.011986 | -0.053955 | 0.011955 | 0.024643 | 0.030258 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Q9Y616_IRAK3_HUMAN_KD1 | -0.016159 | 0.030655 | 0.008446 | 0.023926 | 0.003010 | 0.025528 | -0.027893 | -0.068665 | 0.048035 | -0.020081 | ... | 0.000594 | -0.020996 | 0.041138 | -0.086670 | 0.060516 | 0.029602 | -0.006458 | -0.001279 | 0.013954 | -0.011383 |
Q9Y6E0_STK24_HUMAN_KD1 | 0.083130 | 0.109009 | 0.007038 | 0.000793 | 0.000355 | 0.006264 | -0.031525 | -0.044250 | -0.047546 | -0.010468 | ... | -0.040649 | -0.009003 | -0.076111 | -0.012901 | 0.069702 | -0.057648 | -0.023697 | -0.003151 | 0.001767 | -0.020844 |
Q9Y6M4_KC1G3_HUMAN_KD1 | 0.081421 | 0.142700 | -0.008904 | -0.010002 | -0.019638 | 0.021576 | -0.047394 | -0.060516 | -0.010567 | -0.041534 | ... | -0.013542 | 0.002172 | -0.053345 | 0.003716 | 0.061646 | -0.072937 | -0.019058 | -0.049835 | -0.028427 | -0.005585 |
Q9Y6R4_M3K4_HUMAN_KD1 | 0.069580 | 0.103821 | 0.000365 | 0.015762 | -0.018250 | 0.008316 | -0.008224 | -0.042358 | 0.005066 | -0.021591 | ... | -0.022095 | -0.014961 | -0.034424 | -0.036560 | 0.068237 | -0.025970 | -0.039673 | -0.009048 | -0.008766 | -0.023636 |
Q9Y6S9_RPKL1_HUMAN_KD1 | -0.009552 | 0.027008 | 0.041565 | 0.003145 | -0.022430 | 0.084839 | -0.022430 | -0.047516 | 0.069275 | 0.007629 | ... | 0.039185 | -0.053375 | -0.039093 | -0.053223 | -0.015930 | 0.031982 | 0.005432 | 0.007584 | 0.032257 | 0.042114 |
539 rows × 1024 columns