import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from katlas.core import *
from katlas.plot import *
from scipy.stats import spearmanr, pearsonr
import os
from PIL import Image
from tqdm import tqdm
Plot heatmap and logo of PSPA
Setup
set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.'notebook')
sns.set_context("ticks") sns.set_style(
Load data
= Data.get_pspa_all_norm() df
= Data.get_kinase_info().query('pseudo=="0"') info
= pd.read_csv('raw/pspa_st_raw.csv').set_index('kinase') raw
def logo_func(df:pd.DataFrame, # a dataframe that contains ratios for each amino acid at each position
str='logo', # title of the motif logo
title: =(7,2.5)
figsize
):"Use logomaker plot motif logos given a df matrix "
# Indicates color scheme of the amino acid
= {
aa 'AG': '#037f04',
'DEsty': '#da143e', # sty seems to be the same color as big ST&Y even though we set it here
'F': '#84380b',
'HQN': '#8d2be1',
'LMIFWTVC': '#d9a41c',
'P': '#000000',
'RK': '#0000ff',
'ST': '#8d008d', # STY overwrites the previous s,t,y as logomaker does not distingusih capital and lower case
'Y': '#84380b',
# some old settings
# 'st':'#8d2be1',
# 'y':'#8d2be1'
# 'pS/pT':'#8d2be1',# logomaker does not support double letters like pS or pT
# 'pY':'#8d2be1'
}
# Use logomaker to plot
= logomaker.Logo(df,color_scheme = aa,flip_below=False,figsize=figsize) #5.5,2.5
logo
='%d')
logo.style_xticks(fmt
logo.ax.set_yticks([]) logo.ax.set_title(title)
import logomaker
def get_logo(df: pd.DataFrame, # stacked Dataframe with kinase as index, substrates as columns
str, # a specific kinase name in index
kinase: = (7,2.5)
figsize
):"Given stacked df (index as kinase, columns as substrates), get a specific kinase's logo"
# get raw kinase to calculate S/T
= get_one_kinase(df,kinase,normalize=False)
pp
# get S/T ratio value
= pp['S'].sum()
ss = pp['T'].sum()
st
= 0.75*ss - 0.25*st
S_ctrl = 0.75*st - 0.25*ss
T_ctrl
= S_ctrl / max(S_ctrl, T_ctrl)
S0 = T_ctrl / max(S_ctrl, T_ctrl)
T0
= S0/(S0+T0)
S_ratio = T0/(S0+T0)
T_ratio
# get normalized kinase
= get_one_kinase(df,kinase, normalize=True)
norm_p
# calculate ratio, divide values by median, followed by log2 transformation
=norm_p.apply(lambda r: r/r.median(),axis=1)
ratio = np.log2(ratio)
ratio
= ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
m
= pd.DataFrame({'S': S_ratio*m, 'T':T_ratio*m}, index=[0])
new_row
= pd.concat([ratio, new_row], ignore_index=False).fillna(0)
ratio2
# plot logo
logo_func(ratio2, kinase,figsize)
def get_tyr_logo(df_norm,kinase,figsize):
= get_one_kinase(df_norm,kinase, normalize=False)
norm_p =norm_p.apply(lambda r: r/r.median(),axis=1)
ratio = np.log2(ratio)
ratio = ratio.drop([0])
ratio
= ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
m = pd.DataFrame({'Y': 1*m}, index=[0])
new_row =pd.concat([ratio, new_row], ignore_index=False).fillna(0)
ratio2 logo_func(ratio2,kinase,figsize)
## Test the logo function
# for k in Tyr[:3]:
# get_tyr_logo(df,k)
# plt.show()
# plt.close()
= info[info.group!="TK"].kinase ST
Generate all figures
Uncomment plt.savefig to save figures
def prepare_matrix(df,kinase,aa_order = ['P', 'G', 'A', 'C', 'S', 'T', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', 'H', 'K', 'R', 'Q', 'N', 'D', 'E', 's', 't', 'y']):
= df.loc[kinase].dropna()
d = d.to_frame().reset_index(names='position_aa')
d 'aa'] = d.position_aa.str[-1]
d['position'] =d.position_aa.str[:-1].astype(int)
d[= d.pivot(index='aa',columns='position',values=kinase)
wide_form = wide_form.drop(columns=[0])
matrix = matrix.columns.sort_values()
column_order = matrix.reindex(index=aa_order,columns=column_order)
matrix return matrix
set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.'notebook')
sns.set_context("ticks") sns.set_style(
= df[df['0Y']==0].index
ST = df[df['0Y']==1].index Tyr
for k in ST:
=prepare_matrix(df,k)
matrix=k,figsize=(5.2,10))
plot_heatmap(matrix,titlef'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
plt.savefig(# plt.show()
plt.close()
5.1,2.5)) # use raw data
get_logo(raw, k,(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
plt.savefig(# plt.show()
plt.close()# break
for k in Tyr:
=prepare_matrix(df,k)
matrix=k,figsize=(5.5,10))
plot_heatmap(matrix,titlef'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
plt.savefig(# plt.show()
plt.close()
5.3,2.5)) # use normalized data
get_tyr_logo(df, k,(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
plt.savefig(# plt.show()
plt.close()# break
Combine figures for pdf
def combine_images_vertically(image_paths, output_path):
= [Image.open(image_path).convert('RGBA') for image_path in image_paths]
images
= max(image.width for image in images)
total_width = sum(image.height for image in images)
total_height
= Image.new('RGBA', (total_width, total_height))
combined_image
= 0
y_offset for image in images:
0, y_offset), image)
combined_image.paste(image, (+= image.height
y_offset
combined_image.save(output_path)
Uncomment below to run
= ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
folders for k in tqdm(df.index,total=len(df)):
= f"{k}.png"
filename = [os.path.join(folder, filename) for folder in folders]
image_paths = f"PSPA_heatmaps/combine/{k}.png"
output_path
combine_images_vertically(image_paths, output_path)# break
100%|██████████| 396/396 [01:14<00:00, 5.31it/s]
# folders = ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
# for k in tqdm(Tyr,total=len(Tyr)):
# filename = f"{k}.png"
# image_paths = [os.path.join(folder, filename) for folder in folders]
# output_path = f"PSPA_heatmaps/combine/{k}.png"
# combine_images_vertically(image_paths, output_path)
# break