Plot

Functions to plot on 2D

Setup

Dimensionality reduction


source

reduce_feature


def reduce_feature(
    data, # df or numpy array
    method:str='pca', # dimensionality reduction method, accept both capital and lower case
    complexity:int=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
    n:int=2, # n_components
    seed:int=123, # seed for random_state
    kwargs:VAR_KEYWORD
):

Reduce the dimensionality given a dataframe of values

# morgan fingerprints
df = pd.read_csv('files/morgan.csv')
df
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 ... 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047
0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
298 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
299 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

300 rows × 2048 columns

pca = reduce_feature(df,'pca',n=10)
pca
PCA1 PCA2 PCA3 PCA4 PCA5 PCA6 PCA7 PCA8 PCA9 PCA10
0 5.055364 -0.201475 0.017985 -1.484066 1.548818 0.998801 -1.959704 -1.446077 2.579568 0.791852
1 0.720893 0.104023 3.616964 0.774077 0.262882 -0.813578 0.194586 0.606086 0.337897 1.006187
... ... ... ... ... ... ... ... ... ... ...
298 -0.911653 -0.834387 0.054771 -0.141513 -0.385500 0.036934 0.139089 -0.157255 -0.316494 -0.042620
299 -0.506653 -0.217700 -0.309063 0.005900 -0.275369 -0.652045 -0.151574 0.838589 -0.281150 -0.323430

300 rows × 10 columns

2d plot


source

set_sns


def set_sns(
    
):
set_sns()

source

plot_2d


def plot_2d(
    X:DataFrame, # a dataframe that has first column to be x, and second column to be y
    data:NoneType=None, x:NoneType=None, y:NoneType=None, hue:NoneType=None, size:NoneType=None, style:NoneType=None,
    palette:NoneType=None, hue_order:NoneType=None, hue_norm:NoneType=None, sizes:NoneType=None,
    size_order:NoneType=None, size_norm:NoneType=None, markers:bool=True, style_order:NoneType=None,
    legend:str='auto', ax:NoneType=None
):

Make 2D plot from a dataframe that has first column to be x, and second column to be y

plot_2d(pca.iloc[:,:2])


source

plot_corr


def plot_corr(
    x, # a column of df
    y, # a column of df
    xlabel:NoneType=None, # x axis label
    ylabel:NoneType=None, # y axis label
    order:int=3, # polynomial level, if straight, order=1
):
plot_corr(pca.PCA1,pca.PCA2)

End