Plot

A collection of plot functions

Setup

Utils


source

set_sns

 set_sns ()

Set seaborn resolution for notebook display


source

get_color_dict

 get_color_dict (categories, palette:str='tab20')

Assign colors to a list of names (allow duplicates), returns a dictionary of unique name with corresponding color

Type Default Details
categories list of names to assign color
palette str tab20 choose from sns.color_palette
get_color_dict(['a','a','b'])
{'a': (0.6823529411764706, 0.7803921568627451, 0.9098039215686274),
 'b': (1.0, 0.4980392156862745, 0.054901960784313725)}

Heatmap

# get kinase-substrate dataset
ks = Data.get_ks_dataset()
ks_k = ks[ks.kinase_uniprot=='P00519']
pssm_df = get_prob(ks_k,'site_seq')
pssm_df.head()
Position -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 ... 11 12 13 14 15 16 17 18 19 20
aa
P 0.050061 0.048691 0.062349 0.055489 0.046988 0.054753 0.064787 0.055090 0.056683 0.048272 ... 0.052728 0.051140 0.069436 0.063164 0.057716 0.056639 0.051072 0.050697 0.052163 0.060703
G 0.080586 0.080341 0.069007 0.067551 0.082530 0.070397 0.093581 0.073054 0.077566 0.072706 ... 0.099939 0.070856 0.071916 0.075672 0.071518 0.064821 0.080076 0.088720 0.062341 0.090735
A 0.080586 0.080341 0.062954 0.054282 0.075301 0.071600 0.070186 0.070060 0.065632 0.070322 ... 0.064378 0.077634 0.069436 0.072545 0.063363 0.079924 0.088272 0.087452 0.057888 0.070927
C 0.017094 0.012781 0.013317 0.019903 0.012048 0.017449 0.007798 0.014371 0.013126 0.012515 ... 0.007357 0.017868 0.014879 0.012508 0.011920 0.018880 0.019546 0.014575 0.019084 0.014058
S 0.047619 0.035910 0.046610 0.030157 0.037349 0.042720 0.041992 0.041916 0.034010 0.039333 ... 0.024525 0.036352 0.047117 0.040025 0.042033 0.040277 0.039092 0.051965 0.041349 0.039617

5 rows × 41 columns

@delegates(sns.heatmap)
def plot_heatmap_simple(matrix, # a matrix of values
                 title: str='heatmap', # title of the heatmap
                 figsize: tuple=(6,7), # (width, height)
                 cmap: str='binary', # color map, default is dark&white
                 **kwargs, # arguments for sns.heatmap()
                 ):
    
    "Plot heatmap based on a matrix of values"
    
    plt.figure(figsize=figsize)
    sns.heatmap(matrix, square=True,cmap=cmap, annot=False,**kwargs)
    plt.title(title)
    plt.ylabel('')
    plt.xlabel('')
    plt.yticks(rotation=0)
plot_heatmap_simple(pssm_df,'kinase')


source

plot_heatmap

 plot_heatmap (heatmap_df, ax=None, position_label=True, figsize=(5, 6),
               include_zero=True)

Plots a heatmap with specific formatting.

plot_heatmap(pssm_df);
# plt.savefig('plot.svg',bbox_inches='tight')

Rank

/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/fastcore/docscrape.py:230: UserWarning: Unknown section See Also
  else: warn(msg)

source

plot_rank

 plot_rank (sorted_df:pandas.core.frame.DataFrame, x:str, y:str,
            n_hi:int=10, n_lo:int=10, figsize:tuple=(10, 8), data=None,
            hue=None, size=None, style=None, palette=None, hue_order=None,
            hue_norm=None, sizes=None, size_order=None, size_norm=None,
            markers=True, style_order=None, legend='auto', ax=None)

Plot rank from a sorted dataframe

Type Default Details
sorted_df DataFrame a sorted dataframe
x str column name for x axis
y str column name for y aixs
n_hi int 10 if not None, show the head n names
n_lo int 10 if not None, show the tail n names
figsize tuple (10, 8) figure size
data NoneType None Input data structure. Either a long-form collection of vectors that can be
assigned to named variables or a wide-form dataset that will be internally
reshaped.
hue NoneType None Grouping variable that will produce points with different colors.
Can be either categorical or numeric, although color mapping will
behave differently in latter case.
size NoneType None Grouping variable that will produce points with different sizes.
Can be either categorical or numeric, although size mapping will
behave differently in latter case.
style NoneType None Grouping variable that will produce points with different markers.
Can have a numeric dtype but will always be treated as categorical.
palette NoneType None Method for choosing the colors to use when mapping the hue semantic.
String values are passed to :func:color_palette. List or dict values
imply categorical mapping, while a colormap object implies numeric mapping.
hue_order NoneType None Specify the order of processing and plotting for categorical levels of the
hue semantic.
hue_norm NoneType None Either a pair of values that set the normalization range in data units
or an object that will map from data units into a [0, 1] interval. Usage
implies numeric mapping.
sizes NoneType None An object that determines how sizes are chosen when size is used.
List or dict arguments should provide a size for each unique data value,
which forces a categorical interpretation. The argument may also be a
min, max tuple.
size_order NoneType None Specified order for appearance of the size variable levels,
otherwise they are determined from the data. Not relevant when the
size variable is numeric.
size_norm NoneType None Normalization in data units for scaling plot objects when the
size variable is numeric.
markers bool True Object determining how to draw the markers for different levels of the
style variable. Setting to True will use default markers, or
you can pass a list of markers or a dictionary mapping levels of the
style variable to markers. Setting to False will draw
marker-less lines. Markers are specified as in matplotlib.
style_order NoneType None Specified order for appearance of the style variable levels
otherwise they are determined from the data. Not relevant when the
style variable is numeric.
legend str auto How to draw the legend. If “brief”, numeric hue and size
variables will be represented with a sample of evenly spaced values.
If “full”, every group will get an entry in the legend. If “auto”,
choose between brief or full representation based on number of levels.
If False, no legend data is added and no legend is drawn.
ax NoneType None Pre-existing axes for the plot. Otherwise, call :func:matplotlib.pyplot.gca
internally.
Returns :class:matplotlib.axes.Axes The matplotlib axes containing the plot.
# load data
# df = Data.get_pspa_raw().set_index('kinase')
df = pd.read_csv('https://github.com/sky1ove/katlas_raw/raw/refs/heads/main/nbs/raw/pspa_st_raw.csv').set_index('kinase')


# get sorted dataframe
sorted_df = df.max(1).reset_index(name='values').sort_values('values')
sorted_df.head()
kinase values
68 CK1G2 189898.392
294 VRK2 4191709.640
8 ALPHAK3 4573611.730
249 PRPK 8495330.790
38 CAMLCK 9413689.600
plot_rank(sorted_df,x='kinase',y='values')
plt.xlabel('kinase');

Distribution histogram


source

plot_hist

 plot_hist (df:pandas.core.frame.DataFrame, x:str, figsize:tuple=(6, 2),
            data=None, y=None, hue=None, weights=None, stat='count',
            bins='auto', binwidth=None, binrange=None, discrete=None,
            cumulative=False, common_bins=True, common_norm=True,
            multiple='layer', element='bars', fill=True, shrink=1,
            kde=False, kde_kws=None, line_kws=None, thresh=0,
            pthresh=None, pmax=None, cbar=False, cbar_ax=None,
            cbar_kws=None, palette=None, hue_order=None, hue_norm=None,
            color=None, log_scale=None, legend=True, ax=None)
Type Default Details
df DataFrame a dataframe that contain values for plot
x str column name of values
figsize tuple (6, 2)
data NoneType None Input data structure. Either a long-form collection of vectors that can be
assigned to named variables or a wide-form dataset that will be internally
reshaped.
y NoneType None
hue NoneType None Semantic variable that is mapped to determine the color of plot elements.
weights NoneType None Vector variables
stat str count Aggregate statistic to compute in each bin.

- count: show the number of observations in each bin
- frequency: show the number of observations divided by the bin width
- probability or proportion: normalize such that bar heights sum to 1
- percent: normalize such that bar heights sum to 100
- density: normalize such that the total area of the histogram equals 1
bins str auto Generic bin parameter that can be the name of a reference rule,
the number of bins, or the breaks of the bins.
Passed to :func:numpy.histogram_bin_edges.
binwidth NoneType None Width of each bin, overrides bins but can be used with
binrange.
binrange NoneType None Histogram computation parameters
discrete NoneType None If True, default to binwidth=1 and draw the bars so that they are
centered on their corresponding data points. This avoids “gaps” that may
otherwise appear when using discrete (integer) data.
cumulative bool False If True, plot the cumulative counts as bins increase.
common_bins bool True If True, use the same bins when semantic variables produce multiple
plots. If using a reference rule to determine the bins, it will be computed
with the full dataset.
common_norm bool True If True and using a normalized statistic, the normalization will apply over
the full dataset. Otherwise, normalize each histogram independently.
multiple str layer Approach to resolving multiple elements when semantic mapping creates subsets.
Only relevant with univariate data.
element str bars Visual representation of the histogram statistic.
Only relevant with univariate data.
fill bool True If True, fill in the space under the histogram.
Only relevant with univariate data.
shrink int 1 Histogram appearance parameters
kde bool False If True, compute a kernel density estimate to smooth the distribution
and show on the plot as (one or more) line(s).
Only relevant with univariate data.
kde_kws NoneType None Parameters that control the KDE computation, as in :func:kdeplot.
line_kws NoneType None Histogram smoothing with a kernel density estimate
thresh int 0 Cells with a statistic less than or equal to this value will be transparent.
Only relevant with bivariate data.
pthresh NoneType None Like thresh, but a value in [0, 1] such that cells with aggregate counts
(or other statistics, when used) up to this proportion of the total will be
transparent.
pmax NoneType None A value in [0, 1] that sets that saturation point for the colormap at a value
such that cells below constitute this proportion of the total count (or
other statistic, when used).
cbar bool False If True, add a colorbar to annotate the color mapping in a bivariate plot.
Note: Does not currently support plots with a hue variable well.
cbar_ax NoneType None Pre-existing axes for the colorbar.
cbar_kws NoneType None Bivariate histogram parameters
palette NoneType None Method for choosing the colors to use when mapping the hue semantic.
String values are passed to :func:color_palette. List or dict values
imply categorical mapping, while a colormap object implies numeric mapping.
hue_order NoneType None Specify the order of processing and plotting for categorical levels of the
hue semantic.
hue_norm NoneType None Either a pair of values that set the normalization range in data units
or an object that will map from data units into a [0, 1] interval. Usage
implies numeric mapping.
color NoneType None Hue mapping parameters
log_scale NoneType None Set axis scale(s) to log. A single value sets the data axis for any numeric
axes in the plot. A pair of values sets each axis independently.
Numeric values are interpreted as the desired base (default 10).
When None or False, seaborn defers to the existing Axes scale.
legend bool True If False, suppress the legend for semantic variables.
ax NoneType None Axes information
Returns :class:matplotlib.axes.Axes The matplotlib axes containing the plot.
# we can use the same df
sorted_df.head()
kinase values
68 CK1G2 189898.392
294 VRK2 4191709.640
8 ALPHAK3 4573611.730
249 PRPK 8495330.790
38 CAMLCK 9413689.600
plot_hist(sorted_df,'values')

Scatter plot


source

plot_2d

 plot_2d (X:pandas.core.frame.DataFrame, data=None, x=None, y=None,
          hue=None, size=None, style=None, palette=None, hue_order=None,
          hue_norm=None, sizes=None, size_order=None, size_norm=None,
          markers=True, style_order=None, legend='auto', ax=None)

Make 2D plot from a dataframe that has first column to be x, and second column to be y

Type Default Details
X DataFrame a dataframe that has first column to be x, and second column to be y
data NoneType None Input data structure. Either a long-form collection of vectors that can be
assigned to named variables or a wide-form dataset that will be internally
reshaped.
x NoneType None
y NoneType None
hue NoneType None Grouping variable that will produce points with different colors.
Can be either categorical or numeric, although color mapping will
behave differently in latter case.
size NoneType None Grouping variable that will produce points with different sizes.
Can be either categorical or numeric, although size mapping will
behave differently in latter case.
style NoneType None Grouping variable that will produce points with different markers.
Can have a numeric dtype but will always be treated as categorical.
palette NoneType None Method for choosing the colors to use when mapping the hue semantic.
String values are passed to :func:color_palette. List or dict values
imply categorical mapping, while a colormap object implies numeric mapping.
hue_order NoneType None Specify the order of processing and plotting for categorical levels of the
hue semantic.
hue_norm NoneType None Either a pair of values that set the normalization range in data units
or an object that will map from data units into a [0, 1] interval. Usage
implies numeric mapping.
sizes NoneType None An object that determines how sizes are chosen when size is used.
List or dict arguments should provide a size for each unique data value,
which forces a categorical interpretation. The argument may also be a
min, max tuple.
size_order NoneType None Specified order for appearance of the size variable levels,
otherwise they are determined from the data. Not relevant when the
size variable is numeric.
size_norm NoneType None Normalization in data units for scaling plot objects when the
size variable is numeric.
markers bool True Object determining how to draw the markers for different levels of the
style variable. Setting to True will use default markers, or
you can pass a list of markers or a dictionary mapping levels of the
style variable to markers. Setting to False will draw
marker-less lines. Markers are specified as in matplotlib.
style_order NoneType None Specified order for appearance of the style variable levels
otherwise they are determined from the data. Not relevant when the
style variable is numeric.
legend str auto How to draw the legend. If “brief”, numeric hue and size
variables will be represented with a sample of evenly spaced values.
If “full”, every group will get an entry in the legend. If “auto”,
choose between brief or full representation based on number of levels.
If False, no legend data is added and no legend is drawn.
ax NoneType None Pre-existing axes for the plot. Otherwise, call :func:matplotlib.pyplot.gca
internally.
Returns :class:matplotlib.axes.Axes The matplotlib axes containing the plot.
plot_2d(_.iloc[:,:2])

Dimension reduction - 2d plot


source

plot_cluster

 plot_cluster (df:pandas.core.frame.DataFrame, method:str='pca',
               hue:str=None, complexity:int=30, palette:str='tab20',
               legend:bool=False, name_list=None, seed:int=123, s:int=50,
               **kwargs)

Given a dataframe of values, plot it in 2D. The method could be ‘pca’, ‘tsne’, or ‘umap’.

Type Default Details
df DataFrame a dataframe of values that is waited for dimensionality reduction
method str pca dimensionality reduction method, choose from pca, umap, and tsne
hue str None colname of color
complexity int 30 recommend 30 for tsne, 15 for umap, none for pca
palette str tab20 color scheme, could be tab10 if less categories
legend bool False whether or not add the legend on the side
name_list NoneType None a list of names to annotate each dot in the plot
seed int 123 seed for dimensionality reduction
s int 50 size of the dot
kwargs VAR_KEYWORD
# load data
aa = Data.get_aa_info()
aa_rdkit = get_rdkit_df(aa, 'SMILES') # get rdkit features from SMILES columns
aa_rdkit = preprocess(aa_rdkit) # remove similar columns
info=Data.get_aa_info()
removing columns: {'fr_C_S', 'fr_lactam', 'SlogP_VSA7', 'fr_Nhpyrrole', 'fr_pyridine', 'fr_benzene', 'SlogP_VSA10', 'fr_ketone_Topliss', 'fr_dihydropyridine', 'fr_nitro', 'NumSaturatedCarbocycles', 'fr_C_O_noCOO', 'fr_oxime', 'fr_thiazole', 'fr_Ar_COO', 'fr_Imine', 'fr_urea', 'PEOE_VSA13', 'fr_furan', 'fr_ketone', 'SlogP_VSA9', 'PMI3', 'fr_aniline', 'fr_isothiocyan', 'fr_guanido', 'fr_piperdine', 'fr_ether', 'fr_Ndealkylation1', 'fr_nitrile', 'HeavyAtomCount', 'fr_phos_ester', 'fr_azo', 'fr_diazo', 'MaxPartialCharge', 'fr_prisulfonamd', 'fr_methoxy', 'fr_nitroso', 'Eccentricity', 'fr_nitro_arom', 'fr_piperzine', 'fr_quatN', 'fr_phenol', 'Chi0', 'SMR_VSA2', 'fr_HOCCN', 'PEOE_VSA5', 'fr_isocyan', 'fr_Ar_NH', 'NumAliphaticCarbocycles', 'fr_amidine', 'fr_ester', 'fr_Al_OH_noTert', 'fr_term_acetylene', 'fr_ArN', 'BCUT2D_MRHI', 'fr_amide', 'NumAliphaticRings', 'fr_barbitur', 'fr_Ar_OH', 'fr_alkyl_halide', 'fr_azide', 'LabuteASA', 'fr_hdrzone', 'fr_nitro_arom_nonortho', 'fr_aldehyde', 'fr_bicyclic', 'fr_phenol_noOrthoHbond', 'fr_thiophene', 'ExactMolWt', 'fr_para_hydroxylation', 'fr_thiocyan', 'HeavyAtomMolWt', 'fr_COO', 'MaxEStateIndex', 'fr_allylic_oxid', 'fr_benzodiazepine', 'fr_lactone', 'NumValenceElectrons', 'NumSpiroAtoms', 'SlogP_VSA12', 'Asphericity', 'NumBridgeheadAtoms', 'fr_alkyl_carbamate', 'NumAmideBonds', 'MolMR', 'SMR_VSA8', 'fr_oxazole', 'SlogP_VSA6', 'NumSaturatedRings', 'fr_phos_acid', 'fr_halogen', 'fr_epoxide', 'fr_imide', 'MinAbsPartialCharge', 'SlogP_VSA11', 'fr_tetrazole', 'Chi1n', 'fr_COO2', 'fr_morpholine', 'fr_N_O', 'fr_sulfonamd', 'fr_sulfone', 'fr_aryl_methyl', 'NumRadicalElectrons', 'fr_hdrzine', 'fr_Ndealkylation2', 'EState_VSA11', 'VSA_EState1', 'NumSaturatedHeterocycles'}
removing columns: set()
plot_cluster(aa_rdkit, name_list = aa.Name.tolist(), hue = 'aa')

Bokeh interactive plot


source

plot_bokeh

 plot_bokeh (X:pandas.core.frame.DataFrame, idx, hue:None, s:int=3,
             **kwargs)

Make interactive 2D plot with a searching box and window of dot information when pointing

Type Default Details
X DataFrame a dataframe of two columns from dimensionality reduction
idx pd.Series or list that indicates identities for searching box
hue None pd.Series or list that indicates category for each sample
s int 3 dot size
kwargs VAR_KEYWORD
# PCA reduce dimension to 2
X = reduce_feature(aa_rdkit)

# get info
info=Data.get_aa_info()

#plot
plot_bokeh(X, 
           idx = info.Name,
           hue = info.Name,
           s=7, 
           smiles = info.SMILES)
Loading BokehJS ...

Bar graph


source

plot_count

 plot_count (cnt, tick_spacing:float=None, palette:str='tab20')

Make bar plot from df[‘x’].value_counts()

Type Default Details
cnt from df[‘x’].value_counts()
tick_spacing float None tick spacing for x axis
palette str tab20
# get count
cnt = aa_rdkit.fr_sulfide.round(3).value_counts()

# make plot
plot_count(cnt)


source

plot_bar

 plot_bar (df, value, group, title=None, figsize=(12, 5), fontsize=14,
           dots=True, rotation=90, ascending=False, data=None, x=None,
           y=None, hue=None, order=None, hue_order=None, estimator='mean',
           errorbar=('ci', 95), n_boot=1000, seed=None, units=None,
           weights=None, orient=None, color=None, palette=None,
           saturation=0.75, fill=True, hue_norm=None, width=0.8,
           dodge='auto', gap=0, log_scale=None, native_scale=False,
           formatter=None, legend='auto', capsize=0, err_kws=None,
           ci=<deprecated>, errcolor=<deprecated>, errwidth=<deprecated>,
           ax=None)

Plot bar graph from unstacked dataframe; need to indicate columns of values and categories

Type Default Details
df
value colname of value
group colname of group
title NoneType None
figsize tuple (12, 5)
fontsize int 14
dots bool True whether or not add dots in the graph
rotation int 90
ascending bool False
data NoneType None Dataset for plotting. If x and y are absent, this is
interpreted as wide-form. Otherwise it is expected to be long-form.
x NoneType None
y NoneType None
hue NoneType None
order NoneType None
hue_order NoneType None
estimator str mean Statistical function to estimate within each categorical bin.
errorbar tuple (‘ci’, 95) Name of errorbar method (either “ci”, “pi”, “se”, or “sd”), or a tuple
with a method name and a level parameter, or a function that maps from a
vector to a (min, max) interval, or None to hide errorbar. See the
:doc:errorbar tutorial </tutorial/error_bars> for more information.

.. versionadded:: v0.12.0
n_boot int 1000 Number of bootstrap samples used to compute confidence intervals.
seed NoneType None Seed or random number generator for reproducible bootstrapping.
units NoneType None Identifier of sampling units; used by the errorbar function to
perform a multilevel bootstrap and account for repeated measures
weights NoneType None Data values or column used to compute weighted statistics.
Note that the use of weights may limit other statistical options.

.. versionadded:: v0.13.1
orient NoneType None Orientation of the plot (vertical or horizontal). This is usually
inferred based on the type of the input variables, but it can be used
to resolve ambiguity when both x and y are numeric or when
plotting wide-form data.

.. versionchanged:: v0.13.0
Added ‘x’/‘y’ as options, equivalent to ‘v’/‘h’.
color NoneType None Single color for the elements in the plot.
palette NoneType None Colors to use for the different levels of the hue variable. Should
be something that can be interpreted by :func:color_palette, or a
dictionary mapping hue levels to matplotlib colors.
saturation float 0.75 Proportion of the original saturation to draw fill colors in. Large
patches often look better with desaturated colors, but set this to
1 if you want the colors to perfectly match the input values.
fill bool True If True, use a solid patch. Otherwise, draw as line art.

.. versionadded:: v0.13.0
hue_norm NoneType None Normalization in data units for colormap applied to the hue
variable when it is numeric. Not relevant if hue is categorical.

.. versionadded:: v0.12.0
width float 0.8 Width allotted to each element on the orient axis. When native_scale=True,
it is relative to the minimum distance between two values in the native scale.
dodge str auto When hue mapping is used, whether elements should be narrowed and shifted along
the orient axis to eliminate overlap. If "auto", set to True when the
orient variable is crossed with the categorical variable or False otherwise.

.. versionchanged:: 0.13.0

Added "auto" mode as a new default.
gap int 0 Shrink on the orient axis by this factor to add a gap between dodged elements.

.. versionadded:: 0.13.0
log_scale NoneType None Set axis scale(s) to log. A single value sets the data axis for any numeric
axes in the plot. A pair of values sets each axis independently.
Numeric values are interpreted as the desired base (default 10).
When None or False, seaborn defers to the existing Axes scale.

.. versionadded:: v0.13.0
native_scale bool False When True, numeric or datetime values on the categorical axis will maintain
their original scaling rather than being converted to fixed indices.

.. versionadded:: v0.13.0
formatter NoneType None Function for converting categorical data into strings. Affects both grouping
and tick labels.

.. versionadded:: v0.13.0
legend str auto How to draw the legend. If “brief”, numeric hue and size
variables will be represented with a sample of evenly spaced values.
If “full”, every group will get an entry in the legend. If “auto”,
choose between brief or full representation based on number of levels.
If False, no legend data is added and no legend is drawn.

.. versionadded:: v0.13.0
capsize int 0 Width of the “caps” on error bars, relative to bar spacing.
err_kws NoneType None Parameters of :class:matplotlib.lines.Line2D, for the error bar artists.

.. versionadded:: v0.13.0
ci Deprecated Level of the confidence interval to show, in [0, 100].

.. deprecated:: v0.12.0
Use errorbar=("ci", ...).
errcolor Deprecated Color used for the error bar lines.

.. deprecated:: 0.13.0
Use err_kws={'color': ...}.
errwidth Deprecated Thickness of error bar lines (and caps), in points.

.. deprecated:: 0.13.0
Use err_kws={'linewidth': ...}.
ax NoneType None Axes object to draw the plot onto, otherwise uses the current Axes.
Returns matplotlib Axes Returns the Axes object with the plot drawn onto it.
info['cat'] = (info.pKa1>2).astype(int)
# get an unstacked dataframe
plot_bar(info,value='MW',group='cat',palette='tab20')


source

plot_group_bar

 plot_group_bar (df, value_cols, group, figsize=(12, 5), order=None,
                 title=None, fontsize=14, rotation=90, data=None, x=None,
                 y=None, hue=None, hue_order=None, estimator='mean',
                 errorbar=('ci', 95), n_boot=1000, seed=None, units=None,
                 weights=None, orient=None, color=None, palette=None,
                 saturation=0.75, fill=True, hue_norm=None, width=0.8,
                 dodge='auto', gap=0, log_scale=None, native_scale=False,
                 formatter=None, legend='auto', capsize=0, err_kws=None,
                 ci=<deprecated>, errcolor=<deprecated>,
                 errwidth=<deprecated>, ax=None)

Plot grouped bar graph from dataframe.

Type Default Details
df
value_cols list of column names for values, the order depends on the first item
group column name of group (e.g., ‘kinase’)
figsize tuple (12, 5)
order NoneType None
title NoneType None
fontsize int 14
rotation int 90
data NoneType None Dataset for plotting. If x and y are absent, this is
interpreted as wide-form. Otherwise it is expected to be long-form.
x NoneType None
y NoneType None
hue NoneType None
hue_order NoneType None
estimator str mean Statistical function to estimate within each categorical bin.
errorbar tuple (‘ci’, 95) Name of errorbar method (either “ci”, “pi”, “se”, or “sd”), or a tuple
with a method name and a level parameter, or a function that maps from a
vector to a (min, max) interval, or None to hide errorbar. See the
:doc:errorbar tutorial </tutorial/error_bars> for more information.

.. versionadded:: v0.12.0
n_boot int 1000 Number of bootstrap samples used to compute confidence intervals.
seed NoneType None Seed or random number generator for reproducible bootstrapping.
units NoneType None Identifier of sampling units; used by the errorbar function to
perform a multilevel bootstrap and account for repeated measures
weights NoneType None Data values or column used to compute weighted statistics.
Note that the use of weights may limit other statistical options.

.. versionadded:: v0.13.1
orient NoneType None Orientation of the plot (vertical or horizontal). This is usually
inferred based on the type of the input variables, but it can be used
to resolve ambiguity when both x and y are numeric or when
plotting wide-form data.

.. versionchanged:: v0.13.0
Added ‘x’/‘y’ as options, equivalent to ‘v’/‘h’.
color NoneType None Single color for the elements in the plot.
palette NoneType None Colors to use for the different levels of the hue variable. Should
be something that can be interpreted by :func:color_palette, or a
dictionary mapping hue levels to matplotlib colors.
saturation float 0.75 Proportion of the original saturation to draw fill colors in. Large
patches often look better with desaturated colors, but set this to
1 if you want the colors to perfectly match the input values.
fill bool True If True, use a solid patch. Otherwise, draw as line art.

.. versionadded:: v0.13.0
hue_norm NoneType None Normalization in data units for colormap applied to the hue
variable when it is numeric. Not relevant if hue is categorical.

.. versionadded:: v0.12.0
width float 0.8 Width allotted to each element on the orient axis. When native_scale=True,
it is relative to the minimum distance between two values in the native scale.
dodge str auto When hue mapping is used, whether elements should be narrowed and shifted along
the orient axis to eliminate overlap. If "auto", set to True when the
orient variable is crossed with the categorical variable or False otherwise.

.. versionchanged:: 0.13.0

Added "auto" mode as a new default.
gap int 0 Shrink on the orient axis by this factor to add a gap between dodged elements.

.. versionadded:: 0.13.0
log_scale NoneType None Set axis scale(s) to log. A single value sets the data axis for any numeric
axes in the plot. A pair of values sets each axis independently.
Numeric values are interpreted as the desired base (default 10).
When None or False, seaborn defers to the existing Axes scale.

.. versionadded:: v0.13.0
native_scale bool False When True, numeric or datetime values on the categorical axis will maintain
their original scaling rather than being converted to fixed indices.

.. versionadded:: v0.13.0
formatter NoneType None Function for converting categorical data into strings. Affects both grouping
and tick labels.

.. versionadded:: v0.13.0
legend str auto How to draw the legend. If “brief”, numeric hue and size
variables will be represented with a sample of evenly spaced values.
If “full”, every group will get an entry in the legend. If “auto”,
choose between brief or full representation based on number of levels.
If False, no legend data is added and no legend is drawn.

.. versionadded:: v0.13.0
capsize int 0 Width of the “caps” on error bars, relative to bar spacing.
err_kws NoneType None Parameters of :class:matplotlib.lines.Line2D, for the error bar artists.

.. versionadded:: v0.13.0
ci Deprecated Level of the confidence interval to show, in [0, 100].

.. deprecated:: v0.12.0
Use errorbar=("ci", ...).
errcolor Deprecated Color used for the error bar lines.

.. deprecated:: 0.13.0
Use err_kws={'color': ...}.
errwidth Deprecated Thickness of error bar lines (and caps), in points.

.. deprecated:: 0.13.0
Use err_kws={'linewidth': ...}.
ax NoneType None Axes object to draw the plot onto, otherwise uses the current Axes.
Returns matplotlib Axes Returns the Axes object with the plot drawn onto it.
plot_group_bar(info,['pKa1','pKb2'],'Name')

Box plot


source

plot_box

 plot_box (df, value, group, title=None, figsize=(6, 3), fontsize=14,
           dots=True, rotation=90, data=None, x=None, y=None, hue=None,
           order=None, hue_order=None, orient=None, color=None,
           palette=None, saturation=0.75, fill=True, dodge='auto',
           width=0.8, gap=0, whis=1.5, linecolor='auto', linewidth=None,
           fliersize=None, hue_norm=None, native_scale=False,
           log_scale=None, formatter=None, legend='auto', ax=None)

Plot box plot.

Type Default Details
df
value colname of value
group colname of group
title NoneType None
figsize tuple (6, 3)
fontsize int 14
dots bool True
rotation int 90
data NoneType None Dataset for plotting. If x and y are absent, this is
interpreted as wide-form. Otherwise it is expected to be long-form.
x NoneType None
y NoneType None
hue NoneType None
order NoneType None
hue_order NoneType None
orient NoneType None Orientation of the plot (vertical or horizontal). This is usually
inferred based on the type of the input variables, but it can be used
to resolve ambiguity when both x and y are numeric or when
plotting wide-form data.

.. versionchanged:: v0.13.0
Added ‘x’/‘y’ as options, equivalent to ‘v’/‘h’.
color NoneType None Single color for the elements in the plot.
palette NoneType None Colors to use for the different levels of the hue variable. Should
be something that can be interpreted by :func:color_palette, or a
dictionary mapping hue levels to matplotlib colors.
saturation float 0.75 Proportion of the original saturation to draw fill colors in. Large
patches often look better with desaturated colors, but set this to
1 if you want the colors to perfectly match the input values.
fill bool True If True, use a solid patch. Otherwise, draw as line art.

.. versionadded:: v0.13.0
dodge str auto When hue mapping is used, whether elements should be narrowed and shifted along
the orient axis to eliminate overlap. If "auto", set to True when the
orient variable is crossed with the categorical variable or False otherwise.

.. versionchanged:: 0.13.0

Added "auto" mode as a new default.
width float 0.8 Width allotted to each element on the orient axis. When native_scale=True,
it is relative to the minimum distance between two values in the native scale.
gap int 0 Shrink on the orient axis by this factor to add a gap between dodged elements.

.. versionadded:: 0.13.0
whis float 1.5 Paramater that controls whisker length. If scalar, whiskers are drawn
to the farthest datapoint within whis IQR* from the nearest hinge.
If a tuple, it is interpreted as percentiles that whiskers represent.
linecolor str auto Color to use for line elements, when fill is True.

.. versionadded:: v0.13.0
linewidth NoneType None Width of the lines that frame the plot elements.
fliersize NoneType None Size of the markers used to indicate outlier observations.
hue_norm NoneType None Normalization in data units for colormap applied to the hue
variable when it is numeric. Not relevant if hue is categorical.

.. versionadded:: v0.12.0
native_scale bool False When True, numeric or datetime values on the categorical axis will maintain
their original scaling rather than being converted to fixed indices.

.. versionadded:: v0.13.0
log_scale NoneType None Set axis scale(s) to log. A single value sets the data axis for any numeric
axes in the plot. A pair of values sets each axis independently.
Numeric values are interpreted as the desired base (default 10).
When None or False, seaborn defers to the existing Axes scale.

.. versionadded:: v0.13.0
formatter NoneType None Function for converting categorical data into strings. Affects both grouping
and tick labels.

.. versionadded:: v0.13.0
legend str auto How to draw the legend. If “brief”, numeric hue and size
variables will be represented with a sample of evenly spaced values.
If “full”, every group will get an entry in the legend. If “auto”,
choose between brief or full representation based on number of levels.
If False, no legend data is added and no legend is drawn.

.. versionadded:: v0.13.0
ax NoneType None Axes object to draw the plot onto, otherwise uses the current Axes.
Returns matplotlib Axes Returns the Axes object with the plot drawn onto it.
plot_box(info,value='MW',group='cat',palette='tab20')

Pearson correlation


source

plot_corr

 plot_corr (x, y, xlabel=None, ylabel=None, data=None, text_location=[0.8,
            0.1], x_estimator=None, x_bins=None, x_ci='ci', scatter=True,
            fit_reg=True, ci=95, n_boot=1000, units=None, seed=None,
            order=1, logistic=False, lowess=False, robust=False,
            logx=False, x_partial=None, y_partial=None, truncate=True,
            dropna=True, x_jitter=None, y_jitter=None, label=None,
            color=None, marker='o', scatter_kws=None, line_kws=None,
            ax=None)

Given a dataframe and the name of two columns, plot the two columns’ correlation

Type Default Details
x x axis values, or colname of x axis
y y axis values, or colname of y axis
xlabel NoneType None x axis label
ylabel NoneType None y axis label
data NoneType None dataframe that contains data
text_location list [0.8, 0.1]
x_estimator NoneType None Apply this function to each unique value of x and plot the
resulting estimate. This is useful when x is a discrete variable.
If x_ci is given, this estimate will be bootstrapped and a
confidence interval will be drawn.
x_bins NoneType None Bin the x variable into discrete bins and then estimate the central
tendency and a confidence interval. This binning only influences how
the scatterplot is drawn; the regression is still fit to the original
data. This parameter is interpreted either as the number of
evenly-sized (not necessary spaced) bins or the positions of the bin
centers. When this parameter is used, it implies that the default of
x_estimator is numpy.mean.
x_ci str ci Size of the confidence interval used when plotting a central tendency
for discrete values of x. If "ci", defer to the value of the
ci parameter. If "sd", skip bootstrapping and show the
standard deviation of the observations in each bin.
scatter bool True If True, draw a scatterplot with the underlying observations (or
the x_estimator values).
fit_reg bool True If True, estimate and plot a regression model relating the x
and y variables.
ci int 95 Size of the confidence interval for the regression estimate. This will
be drawn using translucent bands around the regression line. The
confidence interval is estimated using a bootstrap; for large
datasets, it may be advisable to avoid that computation by setting
this parameter to None.
n_boot int 1000 Number of bootstrap resamples used to estimate the ci. The default
value attempts to balance time and stability; you may want to increase
this value for “final” versions of plots.
units NoneType None If the x and y observations are nested within sampling units,
those can be specified here. This will be taken into account when
computing the confidence intervals by performing a multilevel bootstrap
that resamples both units and observations (within unit). This does not
otherwise influence how the regression is estimated or drawn.
seed NoneType None Seed or random number generator for reproducible bootstrapping.
order int 1 If order is greater than 1, use numpy.polyfit to estimate a
polynomial regression.
logistic bool False If True, assume that y is a binary variable and use
statsmodels to estimate a logistic regression model. Note that this
is substantially more computationally intensive than linear regression,
so you may wish to decrease the number of bootstrap resamples
(n_boot) or set ci to None.
lowess bool False If True, use statsmodels to estimate a nonparametric lowess
model (locally weighted linear regression). Note that confidence
intervals cannot currently be drawn for this kind of model.
robust bool False If True, use statsmodels to estimate a robust regression. This
will de-weight outliers. Note that this is substantially more
computationally intensive than standard linear regression, so you may
wish to decrease the number of bootstrap resamples (n_boot) or set
ci to None.
logx bool False If True, estimate a linear regression of the form y ~ log(x), but
plot the scatterplot and regression model in the input space. Note that
x must be positive for this to work.
x_partial NoneType None
y_partial NoneType None
truncate bool True If True, the regression line is bounded by the data limits. If
False, it extends to the x axis limits.
dropna bool True
x_jitter NoneType None
y_jitter NoneType None
label NoneType None Label to apply to either the scatterplot or regression line (if
scatter is False) for use in a legend.
color NoneType None Color to apply to all plot elements; will be superseded by colors
passed in scatter_kws or line_kws.
marker str o Marker to use for the scatterplot glyphs.
scatter_kws NoneType None
line_kws NoneType None
ax NoneType None Axes object to draw the plot onto, otherwise uses the current Axes.
Returns matplotlib Axes The Axes object containing the plot.
norm = Data.get_pspa_st_norm().iloc[:,:-6].T

norm.head()
kinase AAK1 ACVR2A ACVR2B AKT1 AKT2 AKT3 ALK2 ALK4 ALPHAK3 AMPKA1 ... VRK1 VRK2 WNK1 WNK3 WNK4 YANK2 YANK3 YSK1 YSK4 ZAK
-5P 0.0720 0.0415 0.0533 0.0603 0.0602 0.0705 0.0536 0.0552 0.0571 0.0555 ... 0.0710 0.0684 0.0482 0.0413 0.0369 0.0580 0.0625 0.0590 0.0593 0.0604
-5G 0.0245 0.0481 0.0517 0.0594 0.0617 0.0624 0.0659 0.0574 0.0478 0.0504 ... 0.0786 0.0676 0.0510 0.0572 0.0523 0.0699 0.0776 0.0713 0.0728 0.0641
-5A 0.0284 0.0584 0.0566 0.0552 0.0643 0.0745 0.0662 0.0605 0.0253 0.0534 ... 0.0633 0.0636 0.0555 0.0503 0.0539 0.0637 0.0647 0.0731 0.0744 0.0659
-5C 0.0456 0.0489 0.0772 0.0605 0.0582 0.0628 0.0762 0.0483 0.0384 0.0588 ... 0.0641 0.0644 0.0576 0.0732 0.0544 0.0602 0.0598 0.0606 0.0734 0.0631
-5S 0.0425 0.0578 0.0533 0.0516 0.0534 0.0442 0.0567 0.0574 0.0571 0.0504 ... 0.0595 0.0573 0.0561 0.0569 0.0580 0.0580 0.0545 0.0542 0.0597 0.0597

5 rows × 303 columns

plot_corr(data=norm, x='AAK1', y='BIKE')

Matrix heatmap


source

get_similarity

 get_similarity (df, metric='euclidean')

Calculate distance matrix of a df; also return inverse df (similarity df)


source

plot_matrix

 plot_matrix (dist_matrix, inverse_color=False)

Plot distance/similarity matrix

rdkit_corr = aa_rdkit.T.corr()
rdkit_corr.head()
aa A C D E F G H I K L ... S T V W Y s t y Kac Kme3
aa
A 1.000000 0.362285 0.090512 -0.112693 -0.189588 0.725605 0.000479 0.189212 -0.099613 0.270430 ... 0.560214 0.377420 0.294869 -0.345882 -0.292322 -0.257989 -0.294085 -0.540545 -0.446649 -0.353592
C 0.362285 1.000000 -0.064188 -0.173111 -0.249098 0.409469 0.009036 0.008175 -0.245936 -0.027502 ... 0.298754 0.143817 0.101138 -0.221078 -0.274137 -0.039459 -0.154141 -0.339660 -0.297758 -0.210207
D 0.090512 -0.064188 1.000000 0.803788 -0.305487 0.065810 -0.211706 -0.272792 -0.229956 -0.263288 ... 0.302876 0.228545 -0.181244 -0.340342 -0.191931 0.386431 0.295387 -0.044046 -0.131867 -0.231809
E -0.112693 -0.173111 0.803788 1.000000 -0.242507 -0.068794 -0.239756 -0.210293 -0.106431 -0.265852 ... 0.042331 -0.043214 -0.171171 -0.251833 -0.049223 0.241647 0.166818 0.097376 0.125016 -0.177356
F -0.189588 -0.249098 -0.305487 -0.242507 1.000000 -0.155484 0.011415 0.093393 0.019161 0.109600 ... -0.352333 -0.328142 -0.001738 0.660432 0.453157 -0.359352 -0.300223 0.252583 -0.089912 0.023859

5 rows × 25 columns

plot_matrix(rdkit_corr)

AUCDF


source

get_AUCDF

 get_AUCDF (df, col, reverse=False, plot=True, xlabel='Rank of reported
            kinase')

Plot CDF curve and get relative area under the curve

get_AUCDF(sorted_df,'values')

0.8754977337946649

Confusion matrix


source

plot_confusion_matrix

 plot_confusion_matrix (target, pred, class_names:list=['0', '1'],
                        normalize=False, title='Confusion matrix',
                        cmap=<matplotlib.colors.LinearSegmentedColormap
                        object at 0x7fd5fc3af340>)

Plot the confusion matrix.

Type Default Details
target pd.Series
pred pd.Series
class_names list [‘0’, ‘1’]
normalize bool False
title str Confusion matrix
cmap LinearSegmentedColormap <matplotlib.colors.LinearSegmentedColormap object at 0x7fd5fc3af340>
target = info.MW<160
pred = info.pKa1>2.1
plot_confusion_matrix(target,pred,normalize=True)
Normalized confusion matrix

End

# #| export
# def get_logo2(full: pd.DataFrame, # a dataframe that contains the full matrix of a kinase, with index as amino acid, and columns as positions
#               title: str = 'logo', # title of the graph
#               ):
    
#     "Plot logo from a full freqency matrix of a kinase"
    
#     # get S,T,Y ratio
#     S_ratio,T_ratio,Y_ratio = full[0][['s','t','y']]/full[0][['s','t','y']].sum()
    
#     # drop position 0 
#     full = full.drop(columns=[0])
    
#     # identify the minimum value other than 0
#     min_val = full[full > 0].min().min()
    
#     # replace 0s with the identified minimum value
#     full = full.replace(0, min_val)
    
#     norm_p = full.T

#     # calculate ratio, use substraction
#     ratio =norm_p.apply(lambda r: r-r.median(),axis=1)
    
#     # calculate ratio based on previous method, divide values by median, followed by log2
#     # ratio =norm_p.apply(lambda r: r/r.median(),axis=1)
#     # ratio = np.log2(ratio)
    
#     # get the max value for a position
#     m = ratio.apply(lambda row: row[row > 0].sum(), axis=1).max()
    
#     # get the relative height of S,T,Y relative to the max value
#     new_row = pd.DataFrame({'S': S_ratio*m, 'T':T_ratio*m,'Y':Y_ratio*m}, index=[0]) 
    
#     # prepare the matrix for logomaker
#     ratio2 = pd.concat([ratio, new_row], ignore_index=False).fillna(0)

#     # logo_func(ratio2,title)
#     plot_logo_raw(ratio2.T,title=title)

# # get kinase-substrate dataset
# df = Data.get_ks_dataset()

# # get data for a specific kinase
# df_k = df.query('kinase == "DYRK2"')

# # get the full freq matrix
# _,full = get_freq(ks_k)

# # plot logo
# get_logo2(full,'DYRK2')