ProT5 embeddings

from katlas.feature import *
import pandas as pd

Extract T5 embeddings

df = pd.read_excel('uniprot_kd.xlsx')
df.shape
(5536, 27)
t5feature = get_t5(df,'kd_seq')
t5feature_df = t5feature.set_index(df.kd_ID)
# t5feature_df.to_parquet('out/uniprot_kd_t5.parquet')

Load

import pandas as pd
t5=pd.read_parquet('out/uniprot_kd_t5.parquet')
human = t5.loc[t5.index.str.contains('HUMAN')]
human
T5_0 T5_1 T5_2 T5_3 T5_4 T5_5 T5_6 T5_7 T5_8 T5_9 ... T5_1014 T5_1015 T5_1016 T5_1017 T5_1018 T5_1019 T5_1020 T5_1021 T5_1022 T5_1023
kd_ID
A4D2B8_PM2P1_HUMAN_KD1 0.036774 0.059082 0.017517 0.029770 -0.031342 0.051331 -0.036804 -0.007477 0.003513 0.023254 ... 0.005329 -0.007664 -0.023956 -0.061340 0.055481 -0.052155 -0.057739 -0.063416 0.037384 -0.042938
A4QPH2_PI4P2_HUMAN_KD1 0.085022 0.115601 -0.003788 -0.025375 0.002710 0.014046 -0.027451 -0.091187 -0.025314 -0.008125 ... -0.009933 -0.020767 -0.030838 -0.020828 0.036560 -0.056000 -0.019379 0.061218 -0.001641 0.055908
O00141_SGK1_HUMAN_KD1 0.044617 0.130127 -0.013618 0.014923 -0.008316 -0.014488 -0.027954 -0.045044 0.008698 -0.031647 ... -0.034180 -0.009949 -0.016205 -0.006100 0.057068 -0.041412 -0.022461 -0.004608 0.008453 0.003035
O00238_BMR1B_HUMAN_KD1 0.048584 0.143311 0.049774 0.009216 -0.013748 0.034637 -0.025513 -0.067810 0.004253 -0.000382 ... -0.042328 -0.019745 -0.037445 -0.010078 0.058380 -0.034637 -0.032471 -0.003811 -0.033539 -0.025116
O00311_CDC7_HUMAN_KD1 0.037994 0.018890 -0.033600 0.005436 -0.018784 0.082947 -0.035309 -0.002960 0.068237 0.008904 ... -0.050690 -0.050323 -0.017853 -0.051605 0.019638 0.011986 -0.053955 0.011955 0.024643 0.030258
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Q9Y616_IRAK3_HUMAN_KD1 -0.016159 0.030655 0.008446 0.023926 0.003010 0.025528 -0.027893 -0.068665 0.048035 -0.020081 ... 0.000594 -0.020996 0.041138 -0.086670 0.060516 0.029602 -0.006458 -0.001279 0.013954 -0.011383
Q9Y6E0_STK24_HUMAN_KD1 0.083130 0.109009 0.007038 0.000793 0.000355 0.006264 -0.031525 -0.044250 -0.047546 -0.010468 ... -0.040649 -0.009003 -0.076111 -0.012901 0.069702 -0.057648 -0.023697 -0.003151 0.001767 -0.020844
Q9Y6M4_KC1G3_HUMAN_KD1 0.081421 0.142700 -0.008904 -0.010002 -0.019638 0.021576 -0.047394 -0.060516 -0.010567 -0.041534 ... -0.013542 0.002172 -0.053345 0.003716 0.061646 -0.072937 -0.019058 -0.049835 -0.028427 -0.005585
Q9Y6R4_M3K4_HUMAN_KD1 0.069580 0.103821 0.000365 0.015762 -0.018250 0.008316 -0.008224 -0.042358 0.005066 -0.021591 ... -0.022095 -0.014961 -0.034424 -0.036560 0.068237 -0.025970 -0.039673 -0.009048 -0.008766 -0.023636
Q9Y6S9_RPKL1_HUMAN_KD1 -0.009552 0.027008 0.041565 0.003145 -0.022430 0.084839 -0.022430 -0.047516 0.069275 0.007629 ... 0.039185 -0.053375 -0.039093 -0.053223 -0.015930 0.031982 0.005432 0.007584 0.032257 0.042114

539 rows × 1024 columns