pi3ks=['PIK3CA','PIK3CB','PIK3CD','PIK3CG','PIK3R1','PIK3R2','PIK3R3','PTEN','AKT1','AKT2','AKT3','MTOR','RICTOR','RPTOR','TSC1','TSC2','PDK1','IRS1','IRS2','INSR','IGF1R','GAB1','HRAS','NRAS','KRAS','EGFR','ERBB2','ERBB3','ERBB4']Pathway analysis
Setup
Reactome pathway
get_reactome_raw
get_reactome_raw (gene_list)
Reactome pathway analysis for a given gene set; returns raw output in dataframe.
raw_out = get_reactome_raw(pi3ks)
raw_out.head()| stId | dbId | name | llp | inDisease | species.dbId | species.taxId | species.name | entities.resource | entities.total | entities.found | entities.ratio | entities.pValue | entities.fdr | entities.exp | reactions.resource | reactions.total | reactions.found | reactions.ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R-HSA-1963640 | 1963640 | GRB2 events in ERBB2 signaling | True | False | 48887 | 9606 | Homo sapiens | TOTAL | 21 | 9 | 0.001304 | 1.110223e-16 | 1.221245e-15 | [] | TOTAL | 4 | 4 | 0.000255 |
| 1 | R-HSA-9665348 | 9665348 | Signaling by ERBB2 ECD mutants | True | True | 48887 | 9606 | Homo sapiens | TOTAL | 23 | 9 | 0.001428 | 1.110223e-16 | 1.221245e-15 | [] | TOTAL | 15 | 15 | 0.000957 |
| 2 | R-HSA-9664565 | 9664565 | Signaling by ERBB2 KD Mutants | True | True | 48887 | 9606 | Homo sapiens | TOTAL | 35 | 13 | 0.002174 | 1.110223e-16 | 1.221245e-15 | [] | TOTAL | 17 | 17 | 0.001085 |
| 3 | R-HSA-1227990 | 1227990 | Signaling by ERBB2 in Cancer | False | True | 48887 | 9606 | Homo sapiens | TOTAL | 36 | 13 | 0.002236 | 1.110223e-16 | 1.221245e-15 | [] | TOTAL | 62 | 62 | 0.003957 |
| 4 | R-HSA-9665686 | 9665686 | Signaling by ERBB2 TMD/JMD mutants | True | True | 48887 | 9606 | Homo sapiens | TOTAL | 30 | 10 | 0.001863 | 1.110223e-16 | 1.221245e-15 | [] | TOTAL | 13 | 13 | 0.000830 |
get_reactome
get_reactome (gene_list, p_type='FDR')
Reactome pathway analysis for a given gene set; returns formated output in dataframe with additional -log10(p)
| Type | Default | Details | |
|---|---|---|---|
| gene_list | |||
| p_type | str | FDR | or p |
path = get_reactome(pi3ks,p_type='p')
path.head()Running pathway anlysis
Done
| name | reactome_id | p | -log10_p | |
|---|---|---|---|---|
| 0 | GRB2 events in ERBB2 signaling | R-HSA-1963640 | 1.110223e-16 | 15.955 |
| 1 | Signaling by ERBB2 ECD mutants | R-HSA-9665348 | 1.110223e-16 | 15.955 |
| 2 | Signaling by ERBB2 KD Mutants | R-HSA-9664565 | 1.110223e-16 | 15.955 |
| 3 | Signaling by ERBB2 in Cancer | R-HSA-1227990 | 1.110223e-16 | 15.955 |
| 4 | Signaling by ERBB2 TMD/JMD mutants | R-HSA-9665686 | 1.110223e-16 | 15.955 |
path = get_reactome(pi3ks,p_type='FDR')
path.head()Running pathway anlysis
Done
| name | reactome_id | FDR | -log10_FDR | |
|---|---|---|---|---|
| 0 | GRB2 events in ERBB2 signaling | R-HSA-1963640 | 1.221245e-15 | 14.913 |
| 1 | Signaling by ERBB2 ECD mutants | R-HSA-9665348 | 1.221245e-15 | 14.913 |
| 2 | Signaling by ERBB2 KD Mutants | R-HSA-9664565 | 1.221245e-15 | 14.913 |
| 3 | Signaling by ERBB2 in Cancer | R-HSA-1227990 | 1.221245e-15 | 14.913 |
| 4 | Signaling by ERBB2 TMD/JMD mutants | R-HSA-9665686 | 1.221245e-15 | 14.913 |
path[path.FDR<0.05]| name | reactome_id | FDR | -log10_FDR | |
|---|---|---|---|---|
| 0 | GRB2 events in ERBB2 signaling | R-HSA-1963640 | 1.221245e-15 | 14.913 |
| 1 | Signaling by ERBB2 ECD mutants | R-HSA-9665348 | 1.221245e-15 | 14.913 |
| 2 | Signaling by ERBB2 KD Mutants | R-HSA-9664565 | 1.221245e-15 | 14.913 |
| 3 | Signaling by ERBB2 in Cancer | R-HSA-1227990 | 1.221245e-15 | 14.913 |
| 4 | Signaling by ERBB2 TMD/JMD mutants | R-HSA-9665686 | 1.221245e-15 | 14.913 |
| ... | ... | ... | ... | ... |
| 319 | Viral Infection Pathways | R-HSA-9824446 | 3.888317e-02 | 1.410 |
| 320 | RHO GTPase cycle | R-HSA-9012999 | 3.922694e-02 | 1.406 |
| 321 | RUNX3 regulates p14-ARF | R-HSA-8951936 | 4.374857e-02 | 1.359 |
| 322 | Cellular response to chemical stress | R-HSA-9711123 | 4.654844e-02 | 1.332 |
| 323 | Signaling by Rho GTPases | R-HSA-194315 | 4.672781e-02 | 1.330 |
324 rows × 4 columns
Reference
Download from Reactome/Download_data: https://reactome.org/download-data
Download UniProt to All pathways under Identifier mapping files
for type, there are IEA (Inferred from Electronic Annotation) and TAS (Traceable Author Statement, higher confidence)
ref = Data.get_reactome_pathway()ref.head()| uniprot | reactome_id | pathway | type | species | |
|---|---|---|---|---|---|
| 0 | A0A023GPK8 | R-DME-1500931 | Cell-Cell communication | IEA | Drosophila melanogaster |
| 1 | A0A023GPK8 | R-DME-373753 | Nephrin family interactions | IEA | Drosophila melanogaster |
| 2 | A0A023GRW3 | R-DME-72163 | mRNA Splicing - Major Pathway | IEA | Drosophila melanogaster |
| 3 | A0A023GRW3 | R-DME-72172 | mRNA Splicing | IEA | Drosophila melanogaster |
| 4 | A0A023GRW3 | R-DME-72203 | Processing of Capped Intron-Containing Pre-mRNA | IEA | Drosophila melanogaster |
query_reactome
query_reactome (uniprot_id)
Query uniprot ID in Reactome all level pathway database.
uniprot='P31751' # AKT2
akt_path = query_reactome(uniprot) # AKT2akt_path| reactome_id | uniprot | pathway | type | species | lowest | |
|---|---|---|---|---|---|---|
| 0 | R-HSA-109581 | P31751 | Apoptosis | IEA | Homo sapiens | 0 |
| 1 | R-HSA-109606 | P31751 | Intrinsic Pathway for Apoptosis | IEA | Homo sapiens | 0 |
| 2 | R-HSA-109703 | P31751 | PKB-mediated events | IEA | Homo sapiens | 0 |
| 3 | R-HSA-109704 | P31751 | PI3K Cascade | IEA, TAS | Homo sapiens | 0 |
| 4 | R-HSA-111447 | P31751 | Activation of BAD and translocation to mitocho... | IEA | Homo sapiens | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 93 | R-HSA-9755511 | P31751 | KEAP1-NFE2L2 pathway | TAS | Homo sapiens | 1 |
| 94 | R-HSA-9755779 | P31751 | SARS-CoV-2 targets host intracellular signalli... | TAS | Homo sapiens | 1 |
| 95 | R-HSA-9824446 | P31751 | Viral Infection Pathways | TAS | Homo sapiens | 0 |
| 96 | R-HSA-9824585 | P31751 | Regulation of MITF-M-dependent genes involved ... | IEA | Homo sapiens | 1 |
| 97 | R-HSA-9856651 | P31751 | MITF-M-dependent gene expression | IEA | Homo sapiens | 0 |
98 rows × 6 columns
# lowest
akt_path[akt_path.lowest==1].shape(32, 6)
add_reactome_ref
add_reactome_ref (df, uniprot)
out = add_reactome_ref(path,uniprot)
out.shape(403, 6)
Bar plot of pathways
plot_path
plot_path (react_df, p_type='FDR', ref_id_list=None, ref_col=None, top_n=10, max_label_length=80)
Plot the output of get_reactome. If ref_df is provided, bars corresponding to pathways in ref_df are shown in dark red.
| Type | Default | Details | |
|---|---|---|---|
| react_df | the output df of get_reactome | ||
| p_type | str | FDR | |
| ref_id_list | NoneType | None | list of reactome_id |
| ref_col | NoneType | None | column in reac_df, 1 or 0 to indicate whether it’s in ref |
| top_n | int | 10 | |
| max_label_length | int | 80 |
plot_path(out)
plt.title('PI3K Pathways');
# All level
plot_path(out,p_type='FDR',ref_id_list=akt_path.reactome_id,top_n=15)
plt.title('PI3K Pathways (with highlight as overlap with all level Reactome database)');
out.head()| name | reactome_id | FDR | -log10_FDR | ref_path | ref_path_lowest | |
|---|---|---|---|---|---|---|
| 0 | GRB2 events in ERBB2 signaling | R-HSA-1963640 | 1.221245e-15 | 14.913 | 0 | 0 |
| 1 | Signaling by ERBB2 ECD mutants | R-HSA-9665348 | 1.221245e-15 | 14.913 | 0 | 0 |
| 2 | Signaling by ERBB2 KD Mutants | R-HSA-9664565 | 1.221245e-15 | 14.913 | 0 | 0 |
| 3 | Signaling by ERBB2 in Cancer | R-HSA-1227990 | 1.221245e-15 | 14.913 | 0 | 0 |
| 4 | Signaling by ERBB2 TMD/JMD mutants | R-HSA-9665686 | 1.221245e-15 | 14.913 | 0 | 0 |
# All level, use ref_col
plot_path(out,p_type='FDR',ref_col='ref_path',top_n=15)
plt.title('PI3K Pathways (with highlight as overlap with all level Reactome database)');
# All level
plot_path(out,p_type='FDR',ref_col='ref_path_lowest',top_n=15)
plt.title('PI3K Pathways (with highlight as overlap with lowest level Reactome database)');
Overlap
get_overlap
get_overlap (react_df, ref_id_list=None, ref_col=None, p_type='FDR', thr=0.05, plot=True, figsize=(5, 3), **kwargs)
| Type | Default | Details | |
|---|---|---|---|
| react_df | |||
| ref_id_list | NoneType | None | |
| ref_col | NoneType | None | column in react_df, 1 or 0 to indicate whether it’s in ref |
| p_type | str | FDR | |
| thr | float | 0.05 | original threshold of p value, will be log10 transformed |
| plot | bool | True | |
| figsize | tuple | (5, 3) | |
| kwargs | VAR_KEYWORD |
get_overlap(out, ref_id_list=akt_path.reactome_id,plot=True)0.8979591836734694

get_overlap(out, ref_col='ref_path')0.8979591836734694

get_overlap(out, ref_col='ref_path_lowest')0.96875

Pipeline
out = get_reactome(pi3ks,p_type='FDR')
out = add_reactome_ref(out,'P31749') # kinase uniprot
accuracy = get_overlap(out, ref_col='ref_path',plot=True) # if lowest, change all to lo