from katlas.core import *
import pandas as pd
Phosphorylate entire protein sequence
df_group= Data.get_ks_dataset()
0
O00141_A4FU28_S140
O00141
A4FU28
S140
Sugiyama
CTAGE9
MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...
140
AAAEEARSLEATCEKLSRsNsELEDEILCLEKDLKEEKSKH
A4FU28_S140
MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...
1
O00141_O00141_S252
O00141
O00141
S252
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
252
SQGHIVLTDFGLCKENIEHNsTtstFCGtPEyLAPEVLHKQ
O00141_S252
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
2
O00141_O00141_S255
O00141
O00141
S255
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
255
HIVLTDFGLCKENIEHNsTtstFCGtPEyLAPEVLHKQPYD
O00141_S255
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
3
O00141_O00141_S397
O00141
O00141
S397
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
397
sGPNDLRHFDPEFTEEPVPNsIGKsPDsVLVTAsVKEAAEA
O00141_S397
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
4
O00141_O00141_S404
O00141
O00141
S404
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
404
HFDPEFTEEPVPNsIGKsPDsVLVTAsVKEAAEAFLGFsYA
O00141_S404
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
Signature:
phosphorylate_seq_df(
df,
id_col= 'substrate_uniprot' ,
site_info_col= 'site' ,
sub_seq_col= 'substrate_sequence' ,
)
Docstring: Phosphorylate whole sequence based on phosphosites in a dataframe
File: ~/katlas/katlas/core.py
Type: function
seq = phosphorylate_seq_df(df_group)
seq.head(1 )
0
A0A2R8Y4L2
[S95, S22, T25, S6, S158]
MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVVM...
MSKSEsPKEPEQLRKLFIGGLsFEtTDESLRSHFEQWGTLTDCVVM...
seq_map = seq.set_index('substrate_uniprot' )['substrate_phosphoseq' ]
df_group['substrate_phosphoseq' ] = df_group.substrate_uniprot.map (seq_map)
0
O00141_A4FU28_S140
O00141
A4FU28
S140
Sugiyama
CTAGE9
MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...
140
AAAEEARSLEATCEKLSRsNsELEDEILCLEKDLKEEKSKH
A4FU28_S140
MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...
1
O00141_O00141_S252
O00141
O00141
S252
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
252
SQGHIVLTDFGLCKENIEHNsTtstFCGtPEyLAPEVLHKQ
O00141_S252
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
2
O00141_O00141_S255
O00141
O00141
S255
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
255
HIVLTDFGLCKENIEHNsTtstFCGtPEyLAPEVLHKQPYD
O00141_S255
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
3
O00141_O00141_S397
O00141
O00141
S397
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
397
sGPNDLRHFDPEFTEEPVPNsIGKsPDsVLVTAsVKEAAEA
O00141_S397
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
4
O00141_O00141_S404
O00141
O00141
S404
Sugiyama
SGK1 SGK
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
404
HFDPEFTEEPVPNsIGKsPDsVLVTAsVKEAAEAFLGFsYA
O00141_S404
MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...
Cut sequence
If you want to cut the sequence surrounding the center:
cut_seq('AAkUuPSFSTtH' ,- 5 ,4 )
df.site_seq.apply (lambda x: cut_seq(x,- 5 ,4 ))
0 GSRLLSMVPG
1 DEKGDSNDDY
2 SAGLLSDEDC
3 DHLFWSEETK
4 RFTEYSMTSS
Name: site_seq, dtype: object
Check site
df['site_seq' ] = check_seq_df(df,'site_seq' )