Alternative plots

R for Data Science by Wickham & Grolemund

Author

Sungkyun Cho

Published

March 24, 2024

Load packages
# numerical calculation & data frames
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn.objects as so

# statistics
import statsmodels.api as sm

# pandas options
pd.set_option('mode.copy_on_write', True)  # pandas 2.0
pd.options.display.float_format = '{:.2f}'.format  # pd.reset_option('display.float_format')
pd.options.display.max_rows = 7  # max number of rows to display

# NumPy options
np.set_printoptions(precision = 2, suppress=True)  # suppress scientific notation

# For high resolution display
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats("retina")

Fitted lines

alternatives: plotly, seaborn

mpg = sm.datasets.get_rdataset("mpg", "ggplot2").data

Plotly

Linear and Non-Linear Trendlines in Python

import plotly.express as px
(
    px.scatter(mpg, x="displ", y="hwy", color="drv", trendline="lowess")
    .update_layout(width=700, height=500)
)

다음과 같이 smoothing parameter를 지정할 수 있음

(
    px.scatter(mpg, x="displ", y="hwy", color="drv", 
               trendline="lowess", trendline_options=dict(frac=0.3)) # smoothing parameter 
    .update_layout(width=700, height=500)
)
penguins = sns.load_dataset("penguins")

(
    px.scatter(penguins, x="bill_length_mm", y="bill_depth_mm", color="species",
               trendline="lowess", trendline_options=dict(frac=0.5),
               facet_col="island", # faceting
               opacity=0.5) # alpha
    .update_layout(width=900, height=400)
)

Seaborn: lmplot()

sns.lmplot(mpg, x="displ", y="hwy", hue="drv", # color대신 hue
           lowess=True, 
           scatter_kws={"alpha":.5, "s":20}, # s: point size
           height=3, aspect=5/3) 
plt.show() # 생략해도 무방

penguins = sns.load_dataset("penguins")

sns.lmplot(penguins, x="bill_length_mm", y="bill_depth_mm", hue="species", 
           lowess=True, 
           col="sex", # faceting: col, row
           height=3, scatter_kws={"alpha":.5, "s":5})
plt.show()

Box plot

tips = sns.load_dataset("tips")
px.box(tips, x="sex", y="tip", color="day", facet_col="time")