Plot heatmap and logo of PSPA

Setup

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from katlas.core import *
from katlas.plot import *

from scipy.stats import spearmanr, pearsonr

import os
from PIL import Image
from tqdm import tqdm
sns.set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.set_context('notebook')
sns.set_style("ticks")

Load data

df = Data.get_pspa_all_norm()
info= Data.get_kinase_info().query('pseudo=="0"')
raw = pd.read_csv('raw/pspa_st_raw.csv').set_index('kinase')
def logo_func(df:pd.DataFrame, # a dataframe that contains ratios for each amino acid at each position
              title: str='logo', # title of the motif logo
              figsize=(7,2.5)
             ):
    "Use logomaker plot motif logos given a df matrix "
    
    # Indicates color scheme of the amino acid
    aa = {
        'AG': '#037f04',
        'DEsty': '#da143e', # sty seems to be the same color as big ST&Y even though we set it here
        'F': '#84380b',
        'HQN': '#8d2be1',
        'LMIFWTVC': '#d9a41c',
        'P': '#000000',
        'RK': '#0000ff',
        'ST': '#8d008d', # STY overwrites the previous s,t,y as logomaker does not distingusih capital and lower case
        'Y': '#84380b',
        
        # some old settings
        # 'st':'#8d2be1',
        # 'y':'#8d2be1'
        # 'pS/pT':'#8d2be1',# logomaker does not support double letters like pS or pT
        # 'pY':'#8d2be1'
    }
    
    # Use logomaker to plot
    logo = logomaker.Logo(df,color_scheme = aa,flip_below=False,figsize=figsize) #5.5,2.5
    
    logo.style_xticks(fmt='%d')
    logo.ax.set_yticks([])
    logo.ax.set_title(title)
import logomaker
def get_logo(df: pd.DataFrame, # stacked Dataframe with kinase as index, substrates as columns
             kinase: str, # a specific kinase name in index
             figsize = (7,2.5)
             ):
    "Given stacked df (index as kinase, columns as substrates), get a specific kinase's logo"
    
    
    # get raw kinase to calculate S/T
    pp = get_one_kinase(df,kinase,normalize=False)
    
    # get S/T ratio value
    ss = pp['S'].sum()
    st = pp['T'].sum()

    S_ctrl = 0.75*ss - 0.25*st
    T_ctrl = 0.75*st - 0.25*ss

    S0 = S_ctrl / max(S_ctrl, T_ctrl)
    T0 = T_ctrl / max(S_ctrl, T_ctrl)

    S_ratio = S0/(S0+T0)
    T_ratio = T0/(S0+T0)
    
    # get normalized kinase
    norm_p = get_one_kinase(df,kinase, normalize=True)
    
    # calculate ratio, divide values by median, followed by log2 transformation
    ratio =norm_p.apply(lambda r: r/r.median(),axis=1)
    ratio = np.log2(ratio)

    m = ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()

    new_row = pd.DataFrame({'S': S_ratio*m, 'T':T_ratio*m}, index=[0]) 

    ratio2 = pd.concat([ratio, new_row], ignore_index=False).fillna(0)
    
    # plot logo
    logo_func(ratio2, kinase,figsize)
def get_tyr_logo(df_norm,kinase,figsize):
    
    norm_p = get_one_kinase(df_norm,kinase, normalize=False)
    ratio =norm_p.apply(lambda r: r/r.median(),axis=1)
    ratio = np.log2(ratio)
    ratio = ratio.drop([0])
    
    m = ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
    new_row = pd.DataFrame({'Y': 1*m}, index=[0]) 
    ratio2=pd.concat([ratio, new_row], ignore_index=False).fillna(0)
    logo_func(ratio2,kinase,figsize)
## Test the logo function
# for k in Tyr[:3]:
#     get_tyr_logo(df,k)
#     plt.show()
#     plt.close()
ST = info[info.group!="TK"].kinase

Generate all figures

Uncomment plt.savefig to save figures

def prepare_matrix(df,kinase,aa_order = ['P', 'G', 'A', 'C', 'S', 'T', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', 'H', 'K', 'R', 'Q', 'N', 'D', 'E', 's', 't', 'y']):
    d = df.loc[kinase].dropna()
    d = d.to_frame().reset_index(names='position_aa')
    d['aa'] = d.position_aa.str[-1]
    d['position'] =d.position_aa.str[:-1].astype(int)
    wide_form = d.pivot(index='aa',columns='position',values=kinase)
    matrix = wide_form.drop(columns=[0])
    column_order = matrix.columns.sort_values()
    matrix = matrix.reindex(index=aa_order,columns=column_order)
    return matrix
sns.set(rc={"figure.dpi":200, 'savefig.dpi':200})
sns.set_context('notebook')
sns.set_style("ticks")
ST = df[df['0Y']==0].index
Tyr = df[df['0Y']==1].index
for k in ST:
    matrix=prepare_matrix(df,k)
    plot_heatmap(matrix,title=k,figsize=(5.2,10))
    plt.savefig(f'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
    # plt.show()
    plt.close()
    
    get_logo(raw, k,(5.1,2.5)) # use raw data
    plt.savefig(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
    # plt.show()
    plt.close()
    # break
for k in Tyr:
    matrix=prepare_matrix(df,k)
    plot_heatmap(matrix,title=k,figsize=(5.5,10))
    plt.savefig(f'PSPA_heatmaps/heatmap/{k}.png',bbox_inches='tight', pad_inches=0)
    # plt.show()
    plt.close()
    
    get_tyr_logo(df, k,(5.3,2.5)) # use normalized data
    plt.savefig(f'PSPA_heatmaps/logo/{k}.png',bbox_inches='tight', pad_inches=0.3)
    # plt.show()
    plt.close()
    # break

Combine figures for pdf

def combine_images_vertically(image_paths, output_path):
    images = [Image.open(image_path).convert('RGBA') for image_path in image_paths]
    
    total_width = max(image.width for image in images)
    total_height = sum(image.height for image in images)

    combined_image = Image.new('RGBA', (total_width, total_height))

    y_offset = 0
    for image in images:
        combined_image.paste(image, (0, y_offset), image)
        y_offset += image.height

    combined_image.save(output_path)

Uncomment below to run

folders = ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
for k in tqdm(df.index,total=len(df)):
    filename = f"{k}.png"
    image_paths = [os.path.join(folder, filename) for folder in folders]
    output_path = f"PSPA_heatmaps/combine/{k}.png"
    
    combine_images_vertically(image_paths, output_path)
    # break
100%|██████████| 396/396 [01:14<00:00,  5.31it/s]
# folders = ["PSPA_heatmaps/logo", "PSPA_heatmaps/heatmap"]
# for k in tqdm(Tyr,total=len(Tyr)):
#     filename = f"{k}.png"
#     image_paths = [os.path.join(folder, filename) for folder in folders]
#     output_path = f"PSPA_heatmaps/combine/{k}.png"
    
#     combine_images_vertically(image_paths, output_path)
#     break