from katlas.core import *
import pandas as pd
PSPA scaled data
Normalize so that each position sum to 1
= Data.get_pspa_all_norm().dropna(axis=1) pspa
def normalize_pspa_row(r): return flatten_pssm(clean_zero_normalize(recover_pssm(r)))
= pspa.apply(lambda r: pd.Series(normalize_pspa_row(r)),axis=1) pspa_scale
# pspa_scale.to_parquet('~/katlas/dataset/PSPA/pspa_all_scale.parquet')
Data.get_pspa_all_scale()
-5P | -5G | -5A | -5C | -5S | -5T | -5V | -5I | -5L | -5M | ... | 4H | 4K | 4R | 4Q | 4N | 4D | 4E | 4pS | 4pT | 4pY | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
kinase | |||||||||||||||||||||
AAK1 | 0.05845 | 0.01989 | 0.02305 | 0.03702 | 0.03450 | 0.03450 | 0.07720 | 0.12615 | 0.08061 | 0.07014 | ... | 0.04482 | 0.06651 | 0.07427 | 0.05082 | 0.04738 | 0.03113 | 0.03657 | 0.02009 | 0.02009 | 0.02161 |
ACVR2A | 0.02971 | 0.03443 | 0.04180 | 0.03500 | 0.04137 | 0.04137 | 0.04281 | 0.04474 | 0.04266 | 0.03729 | ... | 0.04202 | 0.03865 | 0.03601 | 0.04517 | 0.04077 | 0.04693 | 0.04693 | 0.05155 | 0.05155 | 0.04319 |
ACVR2B | 0.03779 | 0.03665 | 0.04013 | 0.05473 | 0.03779 | 0.03779 | 0.03850 | 0.03134 | 0.03339 | 0.03658 | ... | 0.04056 | 0.03261 | 0.03514 | 0.04229 | 0.03846 | 0.05278 | 0.05039 | 0.05502 | 0.05502 | 0.04605 |
AKT1 | 0.04669 | 0.04599 | 0.04274 | 0.04684 | 0.03995 | 0.03995 | 0.03306 | 0.03368 | 0.03592 | 0.03910 | ... | 0.05299 | 0.09151 | 0.08648 | 0.05874 | 0.05187 | 0.03541 | 0.02494 | 0.03141 | 0.03141 | 0.02102 |
AKT2 | 0.04617 | 0.04732 | 0.04931 | 0.04464 | 0.04095 | 0.04095 | 0.03321 | 0.03206 | 0.03781 | 0.03934 | ... | 0.05199 | 0.08844 | 0.07580 | 0.04992 | 0.04770 | 0.02772 | 0.02680 | 0.04196 | 0.04196 | 0.03193 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
KDR | 0.04816 | 0.05105 | 0.04224 | 0.03927 | 0.04110 | 0.03996 | 0.03244 | 0.03191 | 0.03251 | 0.03616 | ... | 0.04386 | 0.07036 | 0.07377 | 0.04718 | 0.03979 | 0.03074 | 0.02808 | 0.02492 | 0.02492 | 0.02426 |
FLT4 | 0.03554 | 0.04129 | 0.03795 | 0.04300 | 0.03982 | 0.03663 | 0.03360 | 0.03881 | 0.03686 | 0.04122 | ... | 0.05899 | 0.05158 | 0.06051 | 0.04823 | 0.03842 | 0.04042 | 0.03962 | 0.03548 | 0.03548 | 0.03986 |
WEE1_TYR | 0.04065 | 0.04899 | 0.04279 | 0.04287 | 0.03314 | 0.03330 | 0.04348 | 0.04371 | 0.04876 | 0.04302 | ... | 0.04213 | 0.09097 | 0.10234 | 0.04450 | 0.03460 | 0.02749 | 0.03166 | 0.03051 | 0.03051 | 0.03788 |
YES1 | 0.05216 | 0.04399 | 0.04137 | 0.04084 | 0.04060 | 0.03891 | 0.03352 | 0.02889 | 0.03082 | 0.03567 | ... | 0.04928 | 0.04643 | 0.05988 | 0.05157 | 0.03947 | 0.03330 | 0.03891 | 0.02934 | 0.02934 | 0.03694 |
ZAP70 | 0.03902 | 0.05704 | 0.04038 | 0.03215 | 0.03053 | 0.03332 | 0.03014 | 0.02463 | 0.01990 | 0.03410 | ... | 0.05540 | 0.04640 | 0.04599 | 0.05879 | 0.03352 | 0.04929 | 0.04607 | 0.03633 | 0.03633 | 0.02626 |
396 rows × 230 columns