Reactome pathway analysis

Setup

!pip install reactome2py

Requirement already satisfied: reactome2py in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (3.0.0)
Requirement already satisfied: requests in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (2.32.3)
Requirement already satisfied: pandas>=0.24.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (2.1.4)
Requirement already satisfied: json5>=0.8.4 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from reactome2py) (0.9.25)
Requirement already satisfied: numpy<2,>=1.22.4 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (1.26.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2024.1)
Requirement already satisfied: tzdata>=2022.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas>=0.24.2->reactome2py) (2024.1)
Requirement already satisfied: charset-normalizer<4,>=2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->reactome2py) (2024.6.2)
Requirement already satisfied: six>=1.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=0.24.2->reactome2py) (1.16.0)

from reactome2py import analysis
from matplotlib import pyplot as plt
from pandas import json_normalize
import os, pandas as pd, numpy as np, seaborn as sns
from tqdm import tqdm
from PIL import Image

Utils

def get_reactome_raw(gene_list):
    
    "Reactome pathway analysis for a given gene set; returns raw output in dataframe."
    
    gene_str = ','.join(gene_list)
    
    # set page size and page to -1 ensures to display all pathway results, sort by pvalue instead of fdr, projection set to True is consistent with official web
    result = analysis.identifiers(gene_str, page_size='-1', page='-1', sort_by='ENTITIES_PVALUE',projection=True)
    
    out = json_normalize(result['pathways'])
    
    return out

Example:

# get gene list
genes = pd.read_csv('raw/AKT1.csv')['gene'].tolist()

get_reactome_raw(genes).head()

	stId	dbId	name	llp	inDisease	species.dbId	species.taxId	species.name	entities.resource	entities.total	entities.found	entities.ratio	entities.pValue	entities.fdr	entities.exp	reactions.resource	reactions.total	reactions.found	reactions.ratio
0	R-HSA-111465	111465	Apoptotic cleavage of cellular proteins	True	False	48887	9606	Homo sapiens	TOTAL	38	33	0.002410	0.000122	0.294802	[]	TOTAL	38	38	0.002490
1	R-HSA-5674400	5674400	Constitutive Signaling by AKT1 E17K in Cancer	True	True	48887	9606	Homo sapiens	TOTAL	29	25	0.001839	0.000828	0.877515	[]	TOTAL	18	18	0.001179
2	R-HSA-75153	75153	Apoptotic execution phase	True	False	48887	9606	Homo sapiens	TOTAL	54	39	0.003424	0.001119	0.877515	[]	TOTAL	57	57	0.003735
3	R-HSA-3108232	3108232	SUMO E3 ligases SUMOylate target proteins	False	False	48887	9606	Homo sapiens	TOTAL	184	104	0.011668	0.002038	0.877515	[]	TOTAL	132	102	0.008649
4	R-HSA-2990846	2990846	SUMOylation	False	False	48887	9606	Homo sapiens	TOTAL	193	107	0.012238	0.003148	0.877515	[]	TOTAL	141	111	0.009239

def plot_path(react_out, top_n=10,max_label_length=80):

    "Plot the output of get_reactome."
    
    # Extract the data and reverse it
    data = react_out.head(top_n).set_index('name')['-log10_pValue'].iloc[::-1]
    
    # Truncate labels if they are too long
    truncated_labels = [label[:max_label_length] + '...' if len(label) > max_label_length else label for label in data.index]
    data.index = truncated_labels

    # Calculate the required width: base width + additional width for the longest label
    base_width = 2
    max_label_length = max(data.index, key=len)
    additional_width = len(max_label_length) * 0.1  # Adjust scaling factor as needed
    
    figsize = (base_width + additional_width, 3*top_n/10)  # Adjust height as necessary

    # Plotting
    data.plot.barh(figsize=figsize)
    plt.ylabel('')
    plt.xlabel('-log10(p)')
    plt.tight_layout()

def plot_path(react_out, ref_df=None, top_n=10, max_label_length=80 ):
    """
    Plot the output of get_reactome.
    If ref_df is provided, bars corresponding to pathways in ref_df are shown in dark red.
    """
    import matplotlib.pyplot as plt

    # Take top_n rows
    subset = react_out.head(top_n)

    # Determine bar colors: if ref_df is provided, highlight matching pathways
    if ref_df is not None:
        ref_ids = set(ref_df['reactome_id'])
        colors = ['darkred' if rid in ref_ids else 'C0' for rid in subset['reactome_id']]
    else:
        colors = 'C0'

    # Reverse order for horizontal bar plot
    data = subset.set_index('name')['-log10_pValue'].iloc[::-1]
    # If colors is a list, reverse it to match the data order
    if isinstance(colors, list):
        colors = list(reversed(colors))

    # Truncate labels if too long
    truncated_labels = [label[:max_label_length] + '...' if len(label) > max_label_length else label for label in data.index]
    data.index = truncated_labels

    # Calculate figure width based on label length
    base_width = 2
    max_label = max(data.index, key=len)
    additional_width = len(max_label) * 0.1  # adjust scaling factor as needed
    figsize = (base_width + additional_width, 3 * top_n / 10)

    data.plot.barh(figsize=figsize, color=colors)
    plt.ylabel('')
    plt.xlabel('-log10(p)')
    plt.tight_layout()

plot_path(out_df,ref_df)
plt.title('AKT1');

def get_reactome(gene_list, plot=True):
    
    "Given a gene list, get the processed output of reactome; output contains additional -log10(p)"
    out = get_reactome_raw(gene_list)
    out = out[['stId','name','entities.pValue']]
    out.columns = ['reactome_id','name','pValue']
    out['-log10_pValue'] = -np.log10(out['pValue']).round(3)

    if plot:
        plot_path
    
    return out

out = get_reactome(genes)
out.head()

	reactome_id	name	pValue	-log10_pValue
0	R-HSA-111465	Apoptotic cleavage of cellular proteins	0.000122	3.914
1	R-HSA-5674400	Constitutive Signaling by AKT1 E17K in Cancer	0.000828	3.082
2	R-HSA-75153	Apoptotic execution phase	0.001119	2.951
3	R-HSA-3108232	SUMO E3 ligases SUMOylate target proteins	0.002038	2.691
4	R-HSA-2990846	SUMOylation	0.003148	2.502

out

	reactome_id	name	pValue	-log10_pValue
0	R-HSA-111465	Apoptotic cleavage of cellular proteins	0.000122	3.914
1	R-HSA-5674400	Constitutive Signaling by AKT1 E17K in Cancer	0.000828	3.082
2	R-HSA-75153	Apoptotic execution phase	0.001119	2.951
3	R-HSA-3108232	SUMO E3 ligases SUMOylate target proteins	0.002038	2.691
4	R-HSA-2990846	SUMOylation	0.003148	2.502
...	...	...	...	...
2240	R-HSA-2142753	Arachidonate metabolism	1.000000	-0.000
2241	R-HSA-8956319	Nucleotide catabolism	1.000000	-0.000
2242	R-HSA-446193	Biosynthesis of the N-glycan precursor (dolich...	1.000000	-0.000
2243	R-HSA-1660662	Glycosphingolipid metabolism	1.000000	-0.000
2244	R-HSA-425397	Transport of vitamins, nucleosides, and relate...	1.000000	-0.000

2245 rows × 4 columns

sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")

plot_path(out)
plt.title('AKT1');

Reference

Download from Reactome/Download_data: https://reactome.org/download-data

Download UniProt to All pathways

def get_uniprot_ref(uniprot_id):
    ref = pd.read_excel('raw/reactome_refer_subpathways.xlsx')
    uniprot_ref = ref[ref.uniprot==uniprot_id].copy()
    uniprot_ref=uniprot_ref.drop_duplicates('reactome_id').reset_index(drop=True)
    col = ['uniprot','reactome_id','pathway','type','species']
    return uniprot_ref[col]

ref_akt = get_uniprot_ref('P31749')

ref_akt

	uniprot	reactome_id	pathway	type	species
0	P31749	R-HSA-111447	Activation of BAD and translocation to mitocho...	IEA	Homo sapiens
1	P31749	R-HSA-1257604	PIP3 activates AKT signaling	TAS	Homo sapiens
2	P31749	R-HSA-1358803	Downregulation of ERBB2:ERBB3 signaling	TAS	Homo sapiens
3	P31749	R-HSA-1445148	Translocation of SLC2A4 (GLUT4) to the plasma ...	IEA	Homo sapiens
4	P31749	R-HSA-1474151	Tetrahydrobiopterin (BH4) synthesis, recycling...	TAS	Homo sapiens
5	P31749	R-HSA-165159	MTOR signalling	TAS	Homo sapiens
6	P31749	R-HSA-198323	AKT phosphorylates targets in the cytosol	TAS	Homo sapiens
7	P31749	R-HSA-198693	AKT phosphorylates targets in the nucleus	TAS	Homo sapiens
8	P31749	R-HSA-199418	Negative regulation of the PI3K/AKT network	TAS	Homo sapiens
9	P31749	R-HSA-203615	eNOS activation	TAS	Homo sapiens
10	P31749	R-HSA-211163	AKT-mediated inactivation of FOXO1A	TAS	Homo sapiens
11	P31749	R-HSA-354192	Integrin signaling	TAS	Homo sapiens
12	P31749	R-HSA-3769402	Deactivation of the beta-catenin transactivati...	TAS	Homo sapiens
13	P31749	R-HSA-389357	CD28 dependent PI3K/Akt signaling	TAS	Homo sapiens
14	P31749	R-HSA-389513	Co-inhibition by CTLA4	TAS	Homo sapiens
15	P31749	R-HSA-392451	G beta:gamma signalling through PI3Kgamma	TAS	Homo sapiens
16	P31749	R-HSA-450385	Butyrate Response Factor 1 (BRF1) binds and de...	TAS	Homo sapiens
17	P31749	R-HSA-450604	KSRP (KHSRP) binds and destabilizes mRNA	TAS	Homo sapiens
18	P31749	R-HSA-5218920	VEGFR2 mediated vascular permeability	TAS	Homo sapiens
19	P31749	R-HSA-5628897	TP53 Regulates Metabolic Genes	TAS	Homo sapiens
20	P31749	R-HSA-5674400	Constitutive Signaling by AKT1 E17K in Cancer	TAS	Homo sapiens
21	P31749	R-HSA-6785807	Interleukin-4 and Interleukin-13 signaling	TAS	Homo sapiens
22	P31749	R-HSA-6804757	Regulation of TP53 Degradation	TAS	Homo sapiens
23	P31749	R-HSA-6804758	Regulation of TP53 Activity through Acetylation	TAS	Homo sapiens
24	P31749	R-HSA-6804759	Regulation of TP53 Activity through Associatio...	TAS	Homo sapiens
25	P31749	R-HSA-6811558	PI5P, PP2A and IER3 Regulate PI3K/AKT Signaling	TAS	Homo sapiens
26	P31749	R-HSA-69202	Cyclin E associated events during G1/S transit...	TAS	Homo sapiens
27	P31749	R-HSA-69656	Cyclin A:Cdk2-associated events at S phase entry	TAS	Homo sapiens
28	P31749	R-HSA-8849469	PTK6 Regulates RTKs and Their Effectors AKT1 a...	TAS	Homo sapiens
29	P31749	R-HSA-8876198	RAB GEFs exchange GTP for GDP on RABs	TAS	Homo sapiens
30	P31749	R-HSA-8941332	RUNX2 regulates genes involved in cell migration	IEA	Homo sapiens
31	P31749	R-HSA-8948751	Regulation of PTEN stability and activity	TAS	Homo sapiens
32	P31749	R-HSA-9009391	Extra-nuclear estrogen signaling	TAS	Homo sapiens
33	P31749	R-HSA-9604323	Negative regulation of NOTCH4 signaling	TAS	Homo sapiens
34	P31749	R-HSA-9607240	FLT3 Signaling	TAS	Homo sapiens
35	P31749	R-HSA-9614399	Regulation of localization of FOXO transcripti...	TAS	Homo sapiens
36	P31749	R-HSA-9634638	Estrogen-dependent nuclear events downstream o...	TAS	Homo sapiens
37	P31749	R-HSA-9755511	KEAP1-NFE2L2 pathway	TAS	Homo sapiens
38	P31749	R-HSA-9755779	SARS-CoV-2 targets host intracellular signalli...	TAS	Homo sapiens
39	P31749	R-HSA-9841251	Mitochondrial unfolded protein response (UPRmt)	TAS	Homo sapiens
40	P31749	R-HSA-9856530	High laminar flow shear stress activates signa...	TAS	Homo sapiens
41	P31749	R-HSA-9856532	Mechanical load activates signaling by PIEZO1 ...	IEA	Homo sapiens

out_df = out.copy()
ref_df = ref_akt.copy()

def get_rank(r_id):
    ranks = out_df.index[out_df['reactome_id'] == r_id].tolist()
    return ranks[0] + 1 if ranks else None  # Return None if not found

# Apply the function to ref_df
ref_df['rank_in_out'] = ref_df['reactome_id'].apply(get_rank)

import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Assuming ref_df and out_df are your dataframes
ref_ids = set(ref_df['reactome_id'])
out_df['is_ref'] = out_df['reactome_id'].isin(ref_ids)

# Define predicted positives using a pValue threshold
threshold = 0.05
out_df['predicted'] = out_df['pValue'] <= threshold

# Calculate metrics
# precision = precision_score(out_df['is_ref'], out_df['predicted'])
# recall = recall_score(out_df['is_ref'], out_df['predicted'])
# f1 = f1_score(out_df['is_ref'], out_df['predicted'])
roc_auc = roc_auc_score(out_df['is_ref'], out_df['-log10_pValue'])

# print("Precision:", precision)
# print("Recall:", recall)
# print("F1 Score:", f1)
print("ROC-AUC:", roc_auc)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Compute false positive rate (fpr) and true positive rate (tpr)
fpr, tpr, thresholds = roc_curve(out_df['is_ref'], out_df['-log10_pValue'])

# Calculate the area under the curve (AUC)
roc_auc = auc(fpr, tpr)

# Create the ROC curve plot
plt.figure()
plt.plot(fpr, tpr, lw=2, label=f'ROC curve (area = {roc_auc:0.2f})')
plt.plot([0, 1], [0, 1], lw=2, linestyle='--', color='gray')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

ROC-AUC: 0.6408955320666624

Run

from katlas.core import *
from katlas.plot import *

For the original human phosphoproteome (all-uppercase):

site= Data.get_combine_site_psp_ochoa()

For phosphorylated human phosphoproteome, uncheck below:

# site = Data.get_combine_site_phosphorylated()

For PSP site, uncheck below:

# site = Data.get_psp_human_site()

# site['acceptor'] = site.site_seq.str[7]

# # remove none sty-phosphoacceptor sequence
# site = site['acceptor'].isin(['s','t','y'])]

# # convert lowercase other than s,t,y to capital; convert rare aa to _
# site['site_seq'] = site.site_seq.apply(convert_string)

# site = site.reset_index(drop=True)

# site.to_parquet('PSP_human_processed_sites.parquet')

# site = pd.read_parquet('PSP_human_processed_sites.parquet')

As the original phosphoproteome is an all-uppercase dataset, we choose param to be param_CDDM_upper

param = param_CDDM_upper

## for phosphorylated dataset, we will choose the standard param
# param = param_CDDM
## or
# param = param_PSPA

ref = param['ref']

results = predict_kinase_df(site,'site_seq',**param)

input dataframe has a length 121419
Preprocessing
Finish preprocessing
Merging reference
Finish merging

result_df = pd.concat([site,results],axis=1)

Plot score distribution

result_df['acceptor'] =  result_df.site_seq.str[7].str.upper()

palette = get_color_dict(['S','T','Y'],'tab20')

hist_params = {'element':'poly',
              'edgecolor': None,
              'alpha':0.5,
              'bins':100,
              'kde':True,
              'palette':palette}

palette

{'S': (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 'T': (0.6823529411764706, 0.7803921568627451, 0.9098039215686274),
 'Y': (1.0, 0.4980392156862745, 0.054901960784313725)}

def plot_hist(df,colname,sty_thr=None,hue='acceptor'):
    
    "Plot histogram of a column (kinase). "
    
    plt.figure(figsize=(6,2))
    
    sns.histplot(data=df,x=colname,hue=hue,**hist_params)
    
    plt.xlabel('')
    plt.title(colname)

    if sty_thr:
        for acceptor,thr in sty_thr.items():
            if thr is not None:
                plt.axvline(thr,color=palette[acceptor])

Ratio method to determine threshold

Step1: Locate the max phosphoacceptor in the STY ratio of a kinase
Step2: Get top 2% sites for that phosphoacceptor, and get the number of sites.
Step3: Use the number and STY ratio to calculate the number of sites for other phosphoacceptor.

def get_ratio(df, kinase, ref, pct=0.98):
    # Copy relevant columns and explode gene column
    data = df[['gene', 'acceptor', kinase]].copy()
    data['gene'] = data.gene.str.split('|')
    data = data.explode('gene')

    # Get STY ratio from reference data
    row = ref[['0S', '0T', '0Y']].loc[kinase]
    if row['0Y'] > 0.8: # apply to pspa
        ratio = {'S': 0, 'T': 0, 'Y': 1}
    elif 0.1 < row['0Y'] < 0.8: # does not apply to pspa
        ratio = {'S': row['0S'], 'T': row['0T'], 'Y': row['0Y']}
    else: # apply to pspa
        total = row['0S'] + row['0T']
        ratio = {'S': row['0S'] / total, 'T': row['0T'] / total, 'Y': 0}

    # Calculate thresholds and get genes
    sty_thr = {'S': None, 'T': None, 'Y': None}
    g_list = []

    # Identify the largest acceptor and calculate its total for top percentile
    max_acceptor = max(ratio, key=ratio.get)
    max_data = data[data['acceptor'] == max_acceptor]

    thr = max_data[kinase].quantile(pct)
    top_max_data = max_data[max_data[kinase]>thr]
    top_total = len(top_max_data)
    
    threshold_max = top_max_data[kinase].min()

    # Scale other acceptors and get the threshold
    for acceptor in ['S', 'T', 'Y']:
        if ratio[acceptor] != 0:
            scaled_data = data[data['acceptor'] == acceptor]
            scaled_data = scaled_data.sort_values(kinase, ascending=False)
            n_top = int(top_total * (ratio[acceptor] / ratio[max_acceptor]))
            top_scaled_data = scaled_data.head(n_top)
            

            g_list.append(top_scaled_data['gene'])
            sty_thr[acceptor] = top_scaled_data[kinase].min()

    genes = pd.concat(g_list).drop_duplicates().dropna().tolist()

    return sty_thr, genes

Pipeline

Get threshold and genes within the percentiile

k = 'ATM'

sty_threshold,genes = get_ratio(result_df,k,ref=ref)

sty_threshold

{'S': 2.6404023, 'T': 1.9869915, 'Y': None}

with threshold, plot score distribute

sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")

plot_hist(result_df,k,sty_threshold)

run reactoe pathway analysis

out = get_reactome(genes)

# exclude paths that has no variance across kinases
exclude=['GTPase','SUMO']
for n in exclude:
    out = out[~out.name.str.contains(n)]

plot path

plot_path(out,10)
plt.title(k);

Run all kinases and save images

from pathlib import Path
from fastcore.xtras import *

def get_img(df, kinase_list, ref, SAVE=False,save_folder='cddm',exclude=['GTPase','SUMO'],pct=0.98):

    paths = []
    for kinase in tqdm(kinase_list):

        sty_thr,genes = get_ratio(df,kinase,ref=ref,pct=pct)

        out = get_reactome(genes)

        out['kinase'] = kinase
        paths.append(out)

        # plot score distribution
        plot_hist(df,kinase,sty_thr)

        distribute_figname = Path(f'{save_folder}/score_distribute/{kinase}.png')
        distribute_figname.parent.mkdir(parents=True,exist_ok=True)
        
        plt.savefig(distribute_figname,bbox_inches='tight', pad_inches=0.3) if SAVE else plt.show()
        plt.close()


        # exclude paths that has no variance across kinases
        out_cut = out.copy()
        for n in exclude:
            out_cut = out_cut[~out_cut.name.str.contains(n)]
            
        # plot pathway bargraph
        plot_path(out_cut,10)
        
        # title_acceptor ='/'.join(acceptors)
        # plt.title(f'{kinase}, top 2% substrates in {title_acceptor} sites')

        path_figname = Path(f'{save_folder}/path_fig/{kinase}.png')
        path_figname.parent.mkdir(parents=True,exist_ok=True)
        
        plt.savefig(path_figname,bbox_inches='tight', pad_inches=0) if SAVE else plt.show()
        plt.close()
    return paths

paths = get_img(result_df,['ATM','AKT1','EGFR'],ref,SAVE=False,save_folder='test')

  0%|          | 0/3 [00:00<?, ?it/s]

 33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]

 67%|██████▋   | 2/3 [00:02<00:01,  1.32s/it]

100%|██████████| 3/3 [00:03<00:00,  1.32s/it]

Uncheck below to generate figures for all of the kinases:

# paths = get_img(result_df,ref.index,ref,SAVE=True,save_folder='test')

The above line will create two folders under the {save_folder}: - path_fig folder contains the pathway analysis. - score_distribute folder contains histogram of score distribution.

Combine images for pdf

def combine_images_vertically(image_paths, output_path):
    # Open images and convert them to 'RGBA' for uniformity
    images = [Image.open(image_path).convert('RGBA') for image_path in image_paths]
    
    # Calculate the total width as the maximum width of any image
    total_width = max(image.width for image in images)
    # Calculate the total height by summing the heights of all images
    total_height = sum(image.height for image in images)
    
    # set the background white board size
    # total_width, total_height = 2010,1200
    total_width, total_height = 3000,1750

    # Create a new image with a white background in 'RGBA' mode
    combined_image = Image.new('RGBA', (total_width, total_height), (255, 255, 255, 255))

    # Initialize the y_offset to start pasting images from the top
    y_offset = 0
    for image in images:
        # Calculate the x position to center the image
        x_offset = (total_width - image.width) // 2
        # Paste the current image into the combined image
        combined_image.paste(image, (x_offset, y_offset), image)
        # Update the y_offset to move to the next position for the next image
        y_offset += image.height

    # Save the combined image to the specified output path
    combined_image.save(output_path)

Uncheck below to merge the pathway figures and score distribution figures together:

# folders = ["test/score_distribute", "test/path_fig"]
# for k in tqdm(ref.index):
#     filename = f"{k}.png"
#     image_paths = [os.path.join(folder, filename) for folder in folders]
#     output_path = Path(f"test/combine/{k}.png")
#     output_path.parent.mkdir(parents=True,exist_ok=True)
    
#     combine_images_vertically(image_paths, output_path)

Save path in csv

Uncheck below to save pathway csv:

# path_data = pd.concat(paths)

# path_data = path_data[['kinase','name','pValue','-log10_pValue']]

# path_data.columns = ['kinase','Reactome_pathway','pValue','neg_log10_pValue']

# path_data.to_csv('pathway_analysis.csv',index=False)