import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from katlas.core import *
from katlas.plot import *
from scipy.stats import spearmanr, pearsonr
import os
from PIL import Image
from tqdm import tqdmPlot heatmap and logo of PSPA
Setup
sns.set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.set_context('notebook')
sns.set_style("ticks")Load data
df = Data.get_pspa_all_norm()info= Data.get_kinase_info().query('pseudo=="0"')raw = pd.read_csv('raw/pspa_st_raw.csv').set_index('kinase')def logo_func(df:pd.DataFrame, # a dataframe that contains ratios for each amino acid at each position
title: str='logo', # title of the motif logo
figsize=(7,2.5)
):
"Use logomaker plot motif logos given a df matrix "
# Indicates color scheme of the amino acid
aa = {
'AG': '#037f04',
'DEsty': '#da143e', # sty seems to be the same color as big ST&Y even though we set it here
'F': '#84380b',
'HQN': '#8d2be1',
'LMIFWTVC': '#d9a41c',
'P': '#000000',
'RK': '#0000ff',
'ST': '#8d008d', # STY overwrites the previous s,t,y as logomaker does not distingusih capital and lower case
'Y': '#84380b',
# some old settings
# 'st':'#8d2be1',
# 'y':'#8d2be1'
# 'pS/pT':'#8d2be1',# logomaker does not support double letters like pS or pT
# 'pY':'#8d2be1'
}
# Use logomaker to plot
logo = logomaker.Logo(df,color_scheme = aa,flip_below=False,figsize=figsize) #5.5,2.5
logo.style_xticks(fmt='%d')
logo.ax.set_yticks([])
logo.ax.set_title(title)import logomakerdef get_logo(df: pd.DataFrame, # stacked Dataframe with kinase as index, substrates as columns
kinase: str, # a specific kinase name in index
figsize = (7,2.5)
):
"Given stacked df (index as kinase, columns as substrates), get a specific kinase's logo"
# get raw kinase to calculate S/T
pp = get_one_kinase(df,kinase,normalize=False)
# get S/T ratio value
ss = pp['S'].sum()
st = pp['T'].sum()
S_ctrl = 0.75*ss - 0.25*st
T_ctrl = 0.75*st - 0.25*ss
S0 = S_ctrl / max(S_ctrl, T_ctrl)
T0 = T_ctrl / max(S_ctrl, T_ctrl)
S_ratio = S0/(S0+T0)
T_ratio = T0/(S0+T0)
# get normalized kinase
norm_p = get_one_kinase(df,kinase, normalize=True)
# calculate ratio, divide values by median, followed by log2 transformation
ratio =norm_p.apply(lambda r: r/r.median(),axis=1)
ratio = np.log2(ratio)
m = ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
new_row = pd.DataFrame({'S': S_ratio*m, 'T':T_ratio*m}, index=[0])
ratio2 = pd.concat([ratio, new_row], ignore_index=False).fillna(0)
# plot logo
logo_func(ratio2, kinase,figsize)def get_tyr_logo(df_norm,kinase,figsize):
norm_p = get_one_kinase(df_norm,kinase, normalize=False)
ratio =norm_p.apply(lambda r: r/r.median(),axis=1)
ratio = np.log2(ratio)
ratio = ratio.drop([0])
m = ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
new_row = pd.DataFrame({'Y': 1*m}, index=[0])
ratio2=pd.concat([ratio, new_row], ignore_index=False).fillna(0)
logo_func(ratio2,kinase,figsize)## Test the logo function
# for k in Tyr[:3]:
# get_tyr_logo(df,k)
# plt.show()
# plt.close()ST = info[info.group!="TK"].kinaseGenerate all figures
Uncomment plt.savefig to save figures
def prepare_matrix(df,kinase,aa_order = ['P', 'G', 'A', 'C', 'S', 'T', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', 'H', 'K', 'R', 'Q', 'N', 'D', 'E', 's', 't', 'y']):
d = df.loc[kinase].dropna()
d = d.to_frame().reset_index(names='position_aa')
d['aa'] = d.position_aa.str[-1]
d['position'] =d.position_aa.str[:-1].astype(int)
wide_form = d.pivot(index='aa',columns='position',values=kinase)
matrix = wide_form.drop(columns=[0])
column_order = matrix.columns.sort_values()
matrix = matrix.reindex(index=aa_order,columns=column_order)
return matrixsns.set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.set_context('notebook')
sns.set_style("ticks")ST = df[df['0Y']==0].index
Tyr = df[df['0Y']==1].indexfor k in ST:
matrix=prepare_matrix(df,k)
plot_heatmap(matrix,title=k,figsize=(5.2,10))
plt.savefig(f'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
# plt.show()
plt.close()
get_logo(raw, k,(5.1,2.5)) # use raw data
plt.savefig(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
# plt.show()
plt.close()
# breakfor k in Tyr:
matrix=prepare_matrix(df,k)
plot_heatmap(matrix,title=k,figsize=(5.5,10))
plt.savefig(f'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
# plt.show()
plt.close()
get_tyr_logo(df, k,(5.3,2.5)) # use normalized data
plt.savefig(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
# plt.show()
plt.close()
# breakCombine figures for pdf
def combine_images_vertically(image_paths, output_path):
images = [Image.open(image_path).convert('RGBA') for image_path in image_paths]
total_width = max(image.width for image in images)
total_height = sum(image.height for image in images)
combined_image = Image.new('RGBA', (total_width, total_height))
y_offset = 0
for image in images:
combined_image.paste(image, (0, y_offset), image)
y_offset += image.height
combined_image.save(output_path)Uncomment below to run
folders = ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
for k in tqdm(df.index,total=len(df)):
filename = f"{k}.png"
image_paths = [os.path.join(folder, filename) for folder in folders]
output_path = f"PSPA_heatmaps/combine/{k}.png"
combine_images_vertically(image_paths, output_path)
# break100%|██████████| 396/396 [01:14<00:00, 5.31it/s]
# folders = ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
# for k in tqdm(Tyr,total=len(Tyr)):
# filename = f"{k}.png"
# image_paths = [os.path.join(folder, filename) for folder in folders]
# output_path = f"PSPA_heatmaps/combine/{k}.png"
# combine_images_vertically(image_paths, output_path)
# break