Load packages
# numerical calculation & data frames
import numpy as np
import pandas as pd
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn.objects as so
# statistics
import statsmodels.api as sm
# pandas options
pd.set_option('mode.copy_on_write' , True ) # pandas 2.0
pd.options.display.float_format = ' {:.2f} ' .format # pd.reset_option('display.float_format')
pd.options.display.max_rows = 7 # max number of rows to display
# NumPy options
np.set_printoptions(precision = 2 , suppress= True ) # suppress scientific notation
# For high resolution display
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats("retina" )
Customizing
Scales and layouts
penguins = sns.load_dataset("penguins" )
(
so.Plot(penguins, x= "body_mass_g" , y= "species" , color= "island" )
.facet(col= "sex" )
.add(so.Dot(), so.Jitter(0.5 ))
.scale(color= "Set2" ) # color palettes: "Set2"
.layout(size= (7 , 5 )) # plot size
)
diamonds = sns.load_dataset("diamonds" )
(
so.Plot(diamonds, x= "carat" , y= "price" , color= "carat" , marker= "cut" )
.add(so.Dots())
.scale(
color= so.Continuous("crest" , norm= (0 , 3 ), trans= "sqrt" ),
)
)
Choosing color palettes
Legends and ticks
(
so.Plot(penguins, x= "species" )
.add(so.Bar(), so.Count())
.scale(x= so.Nominal(order= ["Adelie" , "Gentoo" , "Chinstrap" ])) # x축의 카테고리 순서를 변경
)
(
so.Plot(diamonds, x= "carat" , y= "price" , color= "carat" )
.add(so.Dots())
.scale(
x= so.Continuous().tick(every= 0.5 ),
y= so.Continuous().label(like= "$ {x:,.0f} " ), # %표시: like="{x:.1f%}"
color= so.Continuous().tick(at= [1 , 2 , 3 , 4 ]),
)
)
Limits, labels, and titles
Plot has a number of methods for simple customization, including Plot.label(), Plot.limit(), and Plot.share():
penguins = sns.load_dataset("penguins" )
(
so.Plot(penguins, x= "body_mass_g" , y= "species" , color= "island" )
.facet(col= "sex" )
.add(so.Dot(), so.Jitter(0.5 ))
.share(x= False )
.limit(y= (2.5 , - 0.5 ))
.label(
x= "Body mass (g)" ,
y= "" ,
color= str .capitalize,
title= " {} penguins" .format ,
)
)
Themes
Matplotlib rc parameters을 dictionary로 받음: plt.rcParams.keys()
Seaborn의 style: sns.axes_style()
Matplotlib의 style: plt.style.library[]
sns.axes_style()
# {'axes.facecolor': 'white',
# 'axes.edgecolor': 'black',
# 'axes.grid': False,
# 'axes.axisbelow': 'line',
# 'axes.labelcolor': 'black',
# 'figure.facecolor': 'white',
# 'grid.color': '#b0b0b0',
# 'grid.linestyle': '-',
# 'text.color': 'black',
# 'xtick.color': 'black',
# 'ytick.color': 'black',
# 'xtick.direction': 'out',
# 'ytick.direction': 'out',
# 'lines.solid_capstyle': <CapStyle.projecting: 'projecting'>,
# 'patch.edgecolor': 'black',
# 'patch.force_edgecolor': False,
# 'image.cmap': 'viridis',
# 'font.family': ['NanumGothic'],
# 'font.sans-serif': ['DejaVu Sans',
# 'Bitstream Vera Sans',
# 'Computer Modern Sans Serif',
# 'Lucida Grande',
# 'Verdana',
# 'Geneva',
# 'Lucid',
# 'Arial',
# 'Helvetica',
# 'Avant Garde',
# 'sans-serif'],
# 'xtick.bottom': True,
# 'xtick.top': False,
# 'ytick.left': True,
# 'ytick.right': False,
# 'axes.spines.left': True,
# 'axes.spines.bottom': True,
# 'axes.spines.right': True,
# 'axes.spines.top': True}
plt.style.available
# ['Solarize_Light2',
# '_classic_test_patch',
# '_mpl-gallery',
# '_mpl-gallery-nogrid',
# 'bmh',
# 'classic',
# 'dark_background',
# 'fast',
# 'fivethirtyeight',
# 'ggplot',
# 'grayscale',
# 'seaborn-v0_8',
# 'seaborn-v0_8-bright',
# 'seaborn-v0_8-colorblind',
# 'seaborn-v0_8-dark',
# 'seaborn-v0_8-dark-palette',
# 'seaborn-v0_8-darkgrid',
# 'seaborn-v0_8-deep',
# 'seaborn-v0_8-muted',
# 'seaborn-v0_8-notebook',
# 'seaborn-v0_8-paper',
# 'seaborn-v0_8-pastel',
# 'seaborn-v0_8-poster',
# 'seaborn-v0_8-talk',
# 'seaborn-v0_8-ticks',
# 'seaborn-v0_8-white',
# 'seaborn-v0_8-whitegrid',
# 'tableau-colorblind10']
plt.style.library["seaborn-v0_8-whitegrid" ]
p = so.Plot(penguins, x= "body_mass_g" , y= "species" , color= "island" ).add(so.Dot(), so.Jitter(0.5 ))
p.theme(
{
"axes.facecolor" : "white" ,
"axes.edgecolor" : "0.8" ,
"axes.spines.top" : False ,
"axes.spines.right" : False ,
}
)
p.theme({** sns.axes_style("whitegrid" ), "grid.linestyle" : ":" })
Seaborn: controlling figure aesthetics
# Matplotlib의 스타일을 사용
p.theme({** plt.style.library["fivethirtyeight" ]})
플랏 단위가 아닌 전체 플랏에 대해 적용 하려면,
Plot.config.theme.update()
을 사용: 참고 링크
theme_dict = {** sns.axes_style("whitegrid" ), "grid.linestyle" : ":" }
so.Plot.config.theme.update(theme_dict)
# reset
so.Plot.config.theme.reset()
한글 폰트 설정
플랏 별로 적용시: p.theme({"font.family": ["AppleGothic"]})
전체에 적용시: so.Plot.config.theme.update({"font.family": ["AppleGothic"]})
설치 폰트 확인
# matplotlib에 설치된 폰트 확인
import matplotlib.font_manager as fm
# 폰트 위치/이름 확인
fm.fontManager.ttflist
# 폰트 이름만 추출
sorted ([f.name for f in fm.fontManager.ttflist])
# 폰트 이름 중 'Nanum'이 들어간 폰트 확인
[f.name for f in fm.fontManager.ttflist if "Nanum" in f.name]
폰트 추가 설치
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
# List에 없는 폰트를 추가하는 방법
font_entry = fm.FontEntry(
fname= "/Users/skcho/Library/Fonts/NanumGothic.ttf" , # 폰트 저장 경로
name= "NanumGothic" , # 폰트 이름 설정
)
fm.fontManager.ttflist.insert(0 , font_entry) # Matplotlib에 폰트 추가
plt.rcParams.update(
{"font.family" : "NanumGothic" , "axes.unicode_minus" : False }
) # 폰트 설정 & '-' 문자 깨짐 방지
# 폰트 이름 중 'Nanum'이 들어간 폰트 확인
[f.name for f in fm.fontManager.ttflist if "Nanum" in f.name]
tips = sns.load_dataset("tips" )
tips["day" ] = tips["day" ].map ({"Sun" : "일요일" , "Sat" : "토요일" , "Thur" : "목요일" , "Fri" : "금요일" })
p = so.Plot(tips, x= "day" , y= "tip" ).add(so.Dots()).label(title= "요일별 받은 팁 금액" , y= "팁 금액" )
# 플랏 별로 한글 폰트 설정
p.theme({"font.family" : ["AppleGothic" ]}) # Windows: "Malgun Gothic"
# 또는 전체에 적용하려면,
so.Plot.config.theme.update({"font.family" : ["AppleGothic" ]})
Seaborn 플랏에서 흐릿하게 그려지는 경우: 고해상도 디스플레이에 최적화
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats("retina" )
Custom plots
아래와 같이 sbcustom.py
모듈을 working directory에 저장하거나
sys
module을 이용하여 sbcustom.py
가 있는 폴더를 sys.path
에 추가하거나
import sys
sys.path.append('sbcustom.py이 있는 폴더 경로' )
패키지 형태로 conda 환경에 install하거나 conda 환경 폴더에 직접 저장할 수도 있음
sbcustom.py
"""
Seaborn.objects statistical plotting custom functions.
boxplot, rangeplot
"""
import seaborn as sns
import seaborn.objects as so
def boxplot(df, x, y, color= None , alpha= 0.1 , marker= "<" ):
return (
so.Plot(df, x= x, y= y, color= color)
.add(so.Dots(alpha= alpha, color= ".6" ), so.Jitter(), so.Dodge())
.add(so.Range(), so.Est(errorbar= ("pi" , 50 )), so.Dodge())
.add(so.Dot(pointsize= 8 , marker= marker), so.Agg("median" ), so.Dodge())
.scale(color= "Dark2" )
.theme({** sns.axes_style("whitegrid" )})
)
def rangeplot(df, x, y, color= None , alpha= 0.1 , marker= "<" ):
return (
so.Plot(df, x= x, y= y, color= color)
.add(so.Range(), so.Est(errorbar= ("pi" , 50 )), so.Dodge())
.add(so.Dot(pointsize= 8 , marker= marker), so.Agg("median" ), so.Dodge())
.scale(color= "Dark2" )
.theme({** sns.axes_style("whitegrid" )})
)
다음과 같이 import
해서 사용
import sbcustom as sbc
sbc.rangeplot(penguins, x= "species" , y= "body_mass_g" )
# 또는 직접 함수를 호출
from sbcustom import boxplot, rangeplot
rangeplot(penguins, x= "species" , y= "body_mass_g" )
boxplot(penguins, x= "species" , y= "body_mass_g" , color= "island" )
seaborn.object의 함수를 연결해서 쓸 수 있음!
(
boxplot(penguins, x= "species" , y= "body_mass_g" , color= "island" )
.facet("species" )
.layout(size= (7.5 , 3.5 ))
)
Snippets
Snippets: Configure User Snippets
"seaborn.obj" : {
"prefix" : "sbj" ,
"body" : [
"(" ,
" \t so.Plot($1, x='$2', y='$3')" ,
" \t .add($0)" ,
")" ,
],
"description" : "plot seaborn.objects"
}
Multiple plots in a Jupyter Notebook
display()
.show()
(pyplot으로 변환)
p1 = rangeplot(penguins, x= "species" , y= "body_mass_g" )
p2 = boxplot(penguins, x= "species" , y= "body_mass_g" , color= "island" )
display(p1, p2) # jupyter의 output; print()와 같은 역할