Reactome pathway analysis

Setup

!pip install reactome2py
Requirement already satisfied: reactome2py in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (3.0.0)
Requirement already satisfied: requests in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (2.32.3)
Requirement already satisfied: pandas>=0.24.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (2.1.4)
Requirement already satisfied: json5>=0.8.4 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (0.9.25)
Requirement already satisfied: numpy<2,>=1.22.4 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (1.26.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2024.1)
Requirement already satisfied: tzdata>=2022.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2024.1)
Requirement already satisfied: charset-normalizer<4,>=2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (2024.6.2)
Requirement already satisfied: six>=1.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=0.24.2->reactome2py) (1.16.0)
from reactome2py import analysis
from matplotlib import pyplot as plt
from pandas import json_normalize
import os, pandas as pd, numpy as np, seaborn as sns
from tqdm import tqdm
from PIL import Image

Utils

def get_reactome_raw(gene_list):
    
    "Reactome pathway analysis for a given gene set; returns raw output in dataframe."
    
    gene_str = ','.join(gene_list)
    
    # set page size and page to -1 ensures to display all pathway results, sort by pvalue instead of fdr, projection set to True is consistent with official web
    result = analysis.identifiers(gene_str, page_size='-1', page='-1', sort_by='ENTITIES_PVALUE',projection=True)
    
    out = json_normalize(result['pathways'])
    
    return out

Example:

# get gene list
genes = pd.read_csv('raw/AKT1.csv')['gene'].tolist()

get_reactome_raw(genes).head()
stId dbId name llp inDisease species.dbId species.taxId species.name entities.resource entities.total entities.found entities.ratio entities.pValue entities.fdr entities.exp reactions.resource reactions.total reactions.found reactions.ratio
0 R-HSA-111465 111465 Apoptotic cleavage of cellular proteins True False 48887 9606 Homo sapiens TOTAL 38 33 0.002410 0.000122 0.294802 [] TOTAL 38 38 0.002490
1 R-HSA-5674400 5674400 Constitutive Signaling by AKT1 E17K in Cancer True True 48887 9606 Homo sapiens TOTAL 29 25 0.001839 0.000828 0.877515 [] TOTAL 18 18 0.001179
2 R-HSA-75153 75153 Apoptotic execution phase True False 48887 9606 Homo sapiens TOTAL 54 39 0.003424 0.001119 0.877515 [] TOTAL 57 57 0.003735
3 R-HSA-3108232 3108232 SUMO E3 ligases SUMOylate target proteins False False 48887 9606 Homo sapiens TOTAL 184 104 0.011668 0.002038 0.877515 [] TOTAL 132 102 0.008649
4 R-HSA-2990846 2990846 SUMOylation False False 48887 9606 Homo sapiens TOTAL 193 107 0.012238 0.003148 0.877515 [] TOTAL 141 111 0.009239
def plot_path(react_out, top_n=10,max_label_length=80):

    "Plot the output of get_reactome."
    
    # Extract the data and reverse it
    data = react_out.head(top_n).set_index('name')['-log10_pValue'].iloc[::-1]
    
    # Truncate labels if they are too long
    truncated_labels = [label[:max_label_length] + '...' if len(label) > max_label_length else label for label in data.index]
    data.index = truncated_labels

    # Calculate the required width: base width + additional width for the longest label
    base_width = 2
    max_label_length = max(data.index, key=len)
    additional_width = len(max_label_length) * 0.1  # Adjust scaling factor as needed
    
    figsize = (base_width + additional_width, 3*top_n/10)  # Adjust height as necessary

    # Plotting
    data.plot.barh(figsize=figsize)
    plt.ylabel('')
    plt.xlabel('-log10(p)')
    plt.tight_layout()
def plot_path(react_out, ref_df=None, top_n=10, max_label_length=80 ):
    """
    Plot the output of get_reactome.
    If ref_df is provided, bars corresponding to pathways in ref_df are shown in dark red.
    """
    import matplotlib.pyplot as plt

    # Take top_n rows
    subset = react_out.head(top_n)

    # Determine bar colors: if ref_df is provided, highlight matching pathways
    if ref_df is not None:
        ref_ids = set(ref_df['reactome_id'])
        colors = ['darkred' if rid in ref_ids else 'C0' for rid in subset['reactome_id']]
    else:
        colors = 'C0'

    # Reverse order for horizontal bar plot
    data = subset.set_index('name')['-log10_pValue'].iloc[::-1]
    # If colors is a list, reverse it to match the data order
    if isinstance(colors, list):
        colors = list(reversed(colors))

    # Truncate labels if too long
    truncated_labels = [label[:max_label_length] + '...' if len(label) > max_label_length else label for label in data.index]
    data.index = truncated_labels

    # Calculate figure width based on label length
    base_width = 2
    max_label = max(data.index, key=len)
    additional_width = len(max_label) * 0.1  # adjust scaling factor as needed
    figsize = (base_width + additional_width, 3 * top_n / 10)

    data.plot.barh(figsize=figsize, color=colors)
    plt.ylabel('')
    plt.xlabel('-log10(p)')
    plt.tight_layout()
plot_path(out_df,ref_df)
plt.title('AKT1');

def get_reactome(gene_list, plot=True):
    
    "Given a gene list, get the processed output of reactome; output contains additional -log10(p)"
    out = get_reactome_raw(gene_list)
    out = out[['stId','name','entities.pValue']]
    out.columns = ['reactome_id','name','pValue']
    out['-log10_pValue'] = -np.log10(out['pValue']).round(3)

    if plot:
        plot_path
    
    return out
out = get_reactome(genes)
out.head()
reactome_id name pValue -log10_pValue
0 R-HSA-111465 Apoptotic cleavage of cellular proteins 0.000122 3.914
1 R-HSA-5674400 Constitutive Signaling by AKT1 E17K in Cancer 0.000828 3.082
2 R-HSA-75153 Apoptotic execution phase 0.001119 2.951
3 R-HSA-3108232 SUMO E3 ligases SUMOylate target proteins 0.002038 2.691
4 R-HSA-2990846 SUMOylation 0.003148 2.502
out
reactome_id name pValue -log10_pValue
0 R-HSA-111465 Apoptotic cleavage of cellular proteins 0.000122 3.914
1 R-HSA-5674400 Constitutive Signaling by AKT1 E17K in Cancer 0.000828 3.082
2 R-HSA-75153 Apoptotic execution phase 0.001119 2.951
3 R-HSA-3108232 SUMO E3 ligases SUMOylate target proteins 0.002038 2.691
4 R-HSA-2990846 SUMOylation 0.003148 2.502
... ... ... ... ...
2240 R-HSA-2142753 Arachidonate metabolism 1.000000 -0.000
2241 R-HSA-8956319 Nucleotide catabolism 1.000000 -0.000
2242 R-HSA-446193 Biosynthesis of the N-glycan precursor (dolich... 1.000000 -0.000
2243 R-HSA-1660662 Glycosphingolipid metabolism 1.000000 -0.000
2244 R-HSA-425397 Transport of vitamins, nucleosides, and relate... 1.000000 -0.000

2245 rows × 4 columns

sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")
plot_path(out)
plt.title('AKT1');

Reference

Download from Reactome/Download_data: https://reactome.org/download-data

Download UniProt to All pathways

def get_uniprot_ref(uniprot_id):
    ref = pd.read_excel('raw/reactome_refer_subpathways.xlsx')
    uniprot_ref = ref[ref.uniprot==uniprot_id].copy()
    uniprot_ref=uniprot_ref.drop_duplicates('reactome_id').reset_index(drop=True)
    col = ['uniprot','reactome_id','pathway','type','species']
    return uniprot_ref[col]
ref_akt = get_uniprot_ref('P31749')
ref_akt
uniprot reactome_id pathway type species
0 P31749 R-HSA-111447 Activation of BAD and translocation to mitocho... IEA Homo sapiens
1 P31749 R-HSA-1257604 PIP3 activates AKT signaling TAS Homo sapiens
2 P31749 R-HSA-1358803 Downregulation of ERBB2:ERBB3 signaling TAS Homo sapiens
3 P31749 R-HSA-1445148 Translocation of SLC2A4 (GLUT4) to the plasma ... IEA Homo sapiens
4 P31749 R-HSA-1474151 Tetrahydrobiopterin (BH4) synthesis, recycling... TAS Homo sapiens
5 P31749 R-HSA-165159 MTOR signalling TAS Homo sapiens
6 P31749 R-HSA-198323 AKT phosphorylates targets in the cytosol TAS Homo sapiens
7 P31749 R-HSA-198693 AKT phosphorylates targets in the nucleus TAS Homo sapiens
8 P31749 R-HSA-199418 Negative regulation of the PI3K/AKT network TAS Homo sapiens
9 P31749 R-HSA-203615 eNOS activation TAS Homo sapiens
10 P31749 R-HSA-211163 AKT-mediated inactivation of FOXO1A TAS Homo sapiens
11 P31749 R-HSA-354192 Integrin signaling TAS Homo sapiens
12 P31749 R-HSA-3769402 Deactivation of the beta-catenin transactivati... TAS Homo sapiens
13 P31749 R-HSA-389357 CD28 dependent PI3K/Akt signaling TAS Homo sapiens
14 P31749 R-HSA-389513 Co-inhibition by CTLA4 TAS Homo sapiens
15 P31749 R-HSA-392451 G beta:gamma signalling through PI3Kgamma TAS Homo sapiens
16 P31749 R-HSA-450385 Butyrate Response Factor 1 (BRF1) binds and de... TAS Homo sapiens
17 P31749 R-HSA-450604 KSRP (KHSRP) binds and destabilizes mRNA TAS Homo sapiens
18 P31749 R-HSA-5218920 VEGFR2 mediated vascular permeability TAS Homo sapiens
19 P31749 R-HSA-5628897 TP53 Regulates Metabolic Genes TAS Homo sapiens
20 P31749 R-HSA-5674400 Constitutive Signaling by AKT1 E17K in Cancer TAS Homo sapiens
21 P31749 R-HSA-6785807 Interleukin-4 and Interleukin-13 signaling TAS Homo sapiens
22 P31749 R-HSA-6804757 Regulation of TP53 Degradation TAS Homo sapiens
23 P31749 R-HSA-6804758 Regulation of TP53 Activity through Acetylation TAS Homo sapiens
24 P31749 R-HSA-6804759 Regulation of TP53 Activity through Associatio... TAS Homo sapiens
25 P31749 R-HSA-6811558 PI5P, PP2A and IER3 Regulate PI3K/AKT Signaling TAS Homo sapiens
26 P31749 R-HSA-69202 Cyclin E associated events during G1/S transit... TAS Homo sapiens
27 P31749 R-HSA-69656 Cyclin A:Cdk2-associated events at S phase entry TAS Homo sapiens
28 P31749 R-HSA-8849469 PTK6 Regulates RTKs and Their Effectors AKT1 a... TAS Homo sapiens
29 P31749 R-HSA-8876198 RAB GEFs exchange GTP for GDP on RABs TAS Homo sapiens
30 P31749 R-HSA-8941332 RUNX2 regulates genes involved in cell migration IEA Homo sapiens
31 P31749 R-HSA-8948751 Regulation of PTEN stability and activity TAS Homo sapiens
32 P31749 R-HSA-9009391 Extra-nuclear estrogen signaling TAS Homo sapiens
33 P31749 R-HSA-9604323 Negative regulation of NOTCH4 signaling TAS Homo sapiens
34 P31749 R-HSA-9607240 FLT3 Signaling TAS Homo sapiens
35 P31749 R-HSA-9614399 Regulation of localization of FOXO transcripti... TAS Homo sapiens
36 P31749 R-HSA-9634638 Estrogen-dependent nuclear events downstream o... TAS Homo sapiens
37 P31749 R-HSA-9755511 KEAP1-NFE2L2 pathway TAS Homo sapiens
38 P31749 R-HSA-9755779 SARS-CoV-2 targets host intracellular signalli... TAS Homo sapiens
39 P31749 R-HSA-9841251 Mitochondrial unfolded protein response (UPRmt) TAS Homo sapiens
40 P31749 R-HSA-9856530 High laminar flow shear stress activates signa... TAS Homo sapiens
41 P31749 R-HSA-9856532 Mechanical load activates signaling by PIEZO1 ... IEA Homo sapiens
out_df = out.copy()
ref_df = ref_akt.copy()
def get_rank(r_id):
    ranks = out_df.index[out_df['reactome_id'] == r_id].tolist()
    return ranks[0] + 1 if ranks else None  # Return None if not found

# Apply the function to ref_df
ref_df['rank_in_out'] = ref_df['reactome_id'].apply(get_rank)
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Assuming ref_df and out_df are your dataframes
ref_ids = set(ref_df['reactome_id'])
out_df['is_ref'] = out_df['reactome_id'].isin(ref_ids)

# Define predicted positives using a pValue threshold
threshold = 0.05
out_df['predicted'] = out_df['pValue'] <= threshold

# Calculate metrics
# precision = precision_score(out_df['is_ref'], out_df['predicted'])
# recall = recall_score(out_df['is_ref'], out_df['predicted'])
# f1 = f1_score(out_df['is_ref'], out_df['predicted'])
roc_auc = roc_auc_score(out_df['is_ref'], out_df['-log10_pValue'])

# print("Precision:", precision)
# print("Recall:", recall)
# print("F1 Score:", f1)
print("ROC-AUC:", roc_auc)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Compute false positive rate (fpr) and true positive rate (tpr)
fpr, tpr, thresholds = roc_curve(out_df['is_ref'], out_df['-log10_pValue'])

# Calculate the area under the curve (AUC)
roc_auc = auc(fpr, tpr)

# Create the ROC curve plot
plt.figure()
plt.plot(fpr, tpr, lw=2, label=f'ROC curve (area = {roc_auc:0.2f})')
plt.plot([0, 1], [0, 1], lw=2, linestyle='--', color='gray')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
ROC-AUC: 0.6408955320666624

Run

from katlas.core import *
from katlas.plot import *

For the original human phosphoproteome (all-uppercase):

site= Data.get_combine_site_psp_ochoa()

For phosphorylated human phosphoproteome, uncheck below:

# site = Data.get_combine_site_phosphorylated()

For PSP site, uncheck below:

# site = Data.get_psp_human_site()

# site['acceptor'] = site.site_seq.str[7]

# # remove none sty-phosphoacceptor sequence
# site = site['acceptor'].isin(['s','t','y'])]

# # convert lowercase other than s,t,y to capital; convert rare aa to _
# site['site_seq'] = site.site_seq.apply(convert_string)

# site = site.reset_index(drop=True)

# site.to_parquet('PSP_human_processed_sites.parquet')

# site = pd.read_parquet('PSP_human_processed_sites.parquet')

As the original phosphoproteome is an all-uppercase dataset, we choose param to be param_CDDM_upper

param = param_CDDM_upper

## for phosphorylated dataset, we will choose the standard param
# param = param_CDDM
## or
# param = param_PSPA
ref = param['ref']
results = predict_kinase_df(site,'site_seq',**param)
input dataframe has a length 121419
Preprocessing
Finish preprocessing
Merging reference
Finish merging
result_df = pd.concat([site,results],axis=1)

Plot score distribution

result_df['acceptor'] =  result_df.site_seq.str[7].str.upper()
palette = get_color_dict(['S','T','Y'],'tab20')

hist_params = {'element':'poly',
              'edgecolor': None,
              'alpha':0.5,
              'bins':100,
              'kde':True,
              'palette':palette}
palette
{'S': (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 'T': (0.6823529411764706, 0.7803921568627451, 0.9098039215686274),
 'Y': (1.0, 0.4980392156862745, 0.054901960784313725)}
def plot_hist(df,colname,sty_thr=None,hue='acceptor'):
    
    "Plot histogram of a column (kinase). "
    
    plt.figure(figsize=(6,2))
    
    sns.histplot(data=df,x=colname,hue=hue,**hist_params)
    
    plt.xlabel('')
    plt.title(colname)

    if sty_thr:
        for acceptor,thr in sty_thr.items():
            if thr is not None:
                plt.axvline(thr,color=palette[acceptor])

Ratio method to determine threshold

  • Step1: Locate the max phosphoacceptor in the STY ratio of a kinase
  • Step2: Get top 2% sites for that phosphoacceptor, and get the number of sites.
  • Step3: Use the number and STY ratio to calculate the number of sites for other phosphoacceptor.
def get_ratio(df, kinase, ref, pct=0.98):
    # Copy relevant columns and explode gene column
    data = df[['gene', 'acceptor', kinase]].copy()
    data['gene'] = data.gene.str.split('|')
    data = data.explode('gene')

    # Get STY ratio from reference data
    row = ref[['0S', '0T', '0Y']].loc[kinase]
    if row['0Y'] > 0.8: # apply to pspa
        ratio = {'S': 0, 'T': 0, 'Y': 1}
    elif 0.1 < row['0Y'] < 0.8: # does not apply to pspa
        ratio = {'S': row['0S'], 'T': row['0T'], 'Y': row['0Y']}
    else: # apply to pspa
        total = row['0S'] + row['0T']
        ratio = {'S': row['0S'] / total, 'T': row['0T'] / total, 'Y': 0}

    # Calculate thresholds and get genes
    sty_thr = {'S': None, 'T': None, 'Y': None}
    g_list = []

    # Identify the largest acceptor and calculate its total for top percentile
    max_acceptor = max(ratio, key=ratio.get)
    max_data = data[data['acceptor'] == max_acceptor]

    thr = max_data[kinase].quantile(pct)
    top_max_data = max_data[max_data[kinase]>thr]
    top_total = len(top_max_data)
    
    threshold_max = top_max_data[kinase].min()

    # Scale other acceptors and get the threshold
    for acceptor in ['S', 'T', 'Y']:
        if ratio[acceptor] != 0:
            scaled_data = data[data['acceptor'] == acceptor]
            scaled_data = scaled_data.sort_values(kinase, ascending=False)
            n_top = int(top_total * (ratio[acceptor] / ratio[max_acceptor]))
            top_scaled_data = scaled_data.head(n_top)
            

            g_list.append(top_scaled_data['gene'])
            sty_thr[acceptor] = top_scaled_data[kinase].min()

    genes = pd.concat(g_list).drop_duplicates().dropna().tolist()

    return sty_thr, genes

Pipeline

  1. Get threshold and genes within the percentiile
k = 'ATM'
sty_threshold,genes = get_ratio(result_df,k,ref=ref)
sty_threshold
{'S': 2.6404023, 'T': 1.9869915, 'Y': None}
  1. with threshold, plot score distribute
sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")
plot_hist(result_df,k,sty_threshold)

  1. run reactoe pathway analysis
out = get_reactome(genes)
# exclude paths that has no variance across kinases
exclude=['GTPase','SUMO']
for n in exclude:
    out = out[~out.name.str.contains(n)]
  1. plot path
plot_path(out,10)
plt.title(k);

Run all kinases and save images

from pathlib import Path
from fastcore.xtras import *
def get_img(df, kinase_list, ref, SAVE=False,save_folder='cddm',exclude=['GTPase','SUMO'],pct=0.98):

    paths = []
    for kinase in tqdm(kinase_list):

        sty_thr,genes = get_ratio(df,kinase,ref=ref,pct=pct)

        out = get_reactome(genes)

        out['kinase'] = kinase
        paths.append(out)

        # plot score distribution
        plot_hist(df,kinase,sty_thr)

        distribute_figname = Path(f'{save_folder}/score_distribute/{kinase}.png')
        distribute_figname.parent.mkdir(parents=True,exist_ok=True)
        
        plt.savefig(distribute_figname,bbox_inches='tight', pad_inches=0.3) if SAVE else plt.show()
        plt.close()


        # exclude paths that has no variance across kinases
        out_cut = out.copy()
        for n in exclude:
            out_cut = out_cut[~out_cut.name.str.contains(n)]
            
        # plot pathway bargraph
        plot_path(out_cut,10)
        
        # title_acceptor ='/'.join(acceptors)
        # plt.title(f'{kinase}, top 2% substrates in {title_acceptor} sites')

        path_figname = Path(f'{save_folder}/path_fig/{kinase}.png')
        path_figname.parent.mkdir(parents=True,exist_ok=True)
        
        plt.savefig(path_figname,bbox_inches='tight', pad_inches=0) if SAVE else plt.show()
        plt.close()
    return paths
paths = get_img(result_df,['ATM','AKT1','EGFR'],ref,SAVE=False,save_folder='test')
  0%|          | 0/3 [00:00<?, ?it/s]

 33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]

 67%|██████▋   | 2/3 [00:02<00:01,  1.32s/it]

100%|██████████| 3/3 [00:03<00:00,  1.32s/it]

Uncheck below to generate figures for all of the kinases:

# paths = get_img(result_df,ref.index,ref,SAVE=True,save_folder='test')

The above line will create two folders under the {save_folder}: - path_fig folder contains the pathway analysis. - score_distribute folder contains histogram of score distribution.

Combine images for pdf

def combine_images_vertically(image_paths, output_path):
    # Open images and convert them to 'RGBA' for uniformity
    images = [Image.open(image_path).convert('RGBA') for image_path in image_paths]
    
    # Calculate the total width as the maximum width of any image
    total_width = max(image.width for image in images)
    # Calculate the total height by summing the heights of all images
    total_height = sum(image.height for image in images)
    
    # set the background white board size
    # total_width, total_height = 2010,1200
    total_width, total_height = 3000,1750

    # Create a new image with a white background in 'RGBA' mode
    combined_image = Image.new('RGBA', (total_width, total_height), (255, 255, 255, 255))

    # Initialize the y_offset to start pasting images from the top
    y_offset = 0
    for image in images:
        # Calculate the x position to center the image
        x_offset = (total_width - image.width) // 2
        # Paste the current image into the combined image
        combined_image.paste(image, (x_offset, y_offset), image)
        # Update the y_offset to move to the next position for the next image
        y_offset += image.height

    # Save the combined image to the specified output path
    combined_image.save(output_path)

Uncheck below to merge the pathway figures and score distribution figures together:

# folders = ["test/score_distribute", "test/path_fig"]
# for k in tqdm(ref.index):
#     filename = f"{k}.png"
#     image_paths = [os.path.join(folder, filename) for folder in folders]
#     output_path = Path(f"test/combine/{k}.png")
#     output_path.parent.mkdir(parents=True,exist_ok=True)
    
#     combine_images_vertically(image_paths, output_path)

Save path in csv

Uncheck below to save pathway csv:

# path_data = pd.concat(paths)

# path_data = path_data[['kinase','name','pValue','-log10_pValue']]

# path_data.columns = ['kinase','Reactome_pathway','pValue','neg_log10_pValue']

# path_data.to_csv('pathway_analysis.csv',index=False)