解决绘图中乱码问题
plt
.rcParams
['font.sans-serif']=['Simhei']
plt
.rcParams
['axes.unicode_minus']=False
Seaborn中的三个类
FacetGrid类
JointGrid类
PairGrid类
Seaborn中数据集分布的可视化
Matplotlib是Python主要的绘图库。但是不建议你直接使用它,原因与不推荐你使用NumPy是一样的。虽然Matplotlib很强大,它本身就很复杂,你的图经过大量的调整才能变精致。 Seaborn其实是在matplotlib的基础上进行了更高级的API封装,从而使得作图更加容易,在大多数情况下使用seaborn就能做出很具有吸引力的统计图表 导入标准库
import numpy
as np
import pandas
as pd
from scipy
import stats
, integrate
import matplotlib
.pyplot
as plt
import seaborn
as sns
sns
.set(color_codes
= True)
np
.random
.seed
(sum(map(ord,"distributions")))
单变量分布图
sns.distplot
(a,bins
=None,hist
=True,kde
=True,rug
=False,fit
=None,hist_kws
=None,kde_kws
=None,rug_kws
=None,fit_kws
=None,color
=None,vertical
=False,norm_hist
=False,axlabel
=None,label
=None,ax
=None,
)
直方图
x
= np
.random
.normal
(size
= 100)
sns
.distplot
(x
,
bins
= 30,
kde
= True,
rug
= True
)
核密度估计
或许你对核密度估计(KDE,Kernel density estimaton)可能不像直方图那么熟悉,但它是绘制分布形状的有力工具。如同直方图一样,KDE图会对一个轴上的另一轴的高度的观测密度进行描述: 绘制KDE比绘制直方图更有计算性。所发生的是,每一个观察都被一个以这个值为中心的正态( 高斯)曲线所取代。
x
= np
.random
.normal
(0, 1, size
=10)
bandwidth
= 1.06 * x
.std
() * x
.size
** (-1 / 5.)
support
= np
.linspace
(-4, 4, 200)
kernels
= []
for x_i
in x
:
kernel
= stats
.norm
(x_i
, bandwidth
).pdf
(support
)
kernels
.append
(kernel
)
plt
.plot
(support
, kernel
, color
="r")
sns
.rugplot
(x
,
color
=".3",
height
= 0.3,
linewidth
=1);
density
= np
.sum(kernels
, axis
=0)
density
/= integrate
.trapz
(density
, support
)
plt
.plot
(support
, density
);
seaborn中使用kdeplot()函数,可以得到相同的曲线。 这个函数由distplot()使用,但当只想要密度估计时,它提供了一个更直接的界面,更容易访问其他选项:
sns
.kdeplot
(x
, shade
=True);
sns
.kdeplot
(x
)
sns
.kdeplot
(x
, bw
=.2, label
="bw: 0.2")
sns
.kdeplot
(x
, bw
=2, label
="bw: 2")
plt
.legend
();
拟合参数分布
x
= np
.random
.gamma
(6, size
=200)
sns
.distplot
(x
, kde
=True, fit
=stats
.gamma
);
二元分布图
mean
, cov
= [0, 1], [(1, .5), (.5, 1)]
data
= np
.random
.multivariate_normal
(mean
, cov
, 200)
df
= pd
.DataFrame
(data
, columns
=["x", "y"])
df
.head
()
x y
0
-0
.337495 3
.982726
1 0
.744509 1
.148785
2
-0
.178456 0
.135073
3 0
.326349 0
.582835
4 0
.329835 0
.894002
散点图
sns
.jointplot
(x
="x", y
="y",
kind
= "scatter",
data
=df
);
g
= sns
.JointGrid
(x
="x", y
="y", data
=df
)
g
= g
.plot_joint
(plt
.scatter
, color
="g", edgecolor
="k")
g
= g
.plot_marginals
(sns
.distplot
, kde
=True, color
="b")
Hexbin图
x
, y
= np
.random
.multivariate_normal
(mean
, cov
, 1000).T
with sns
.axes_style
("white"):
sns
.jointplot
(x
=x
, y
=y
, kind
="hex", color
="k")
核密度估计
sns
.jointplot
(x
="x", y
="y", data
=df
, kind
="kde");
g
= sns
.jointplot
(x
="x", y
="y", data
=df
, kind
="kde", color
="b")
g
.plot_joint
(plt
.scatter
, c
="w", s
=30, linewidth
=1, marker
="*")
g
.set_axis_labels
("$X$", "$Y$");
f
, ax
= plt
.subplots
(figsize
=(6, 6))
sns
.kdeplot
(df
.x
, df
.y
, ax
=ax
)
sns
.rugplot
(df
.x
, color
="g", ax
=ax
)
sns
.rugplot
(df
.y
, vertical
=True, ax
=ax
);
可视化数据集中成对的关系
iris
= sns
.load_dataset
("iris")
sns
.pairplot
(iris
);
PairGrid类
g
= sns
.PairGrid
(iris
,
hue
= "species"
)
g
.map_diag
(plt
.hist
)
g
.map_offdiag
(plt
.scatter
)
g
.add_legend
()
sns
.pairplot
(iris
, hue
="species", palette
="Set2", diag_kind
="kde", size
=2.5);
也可以写成
g
= sns
.PairGrid
(iris
, hue
="species", palette
="Set2", size
=2.5)
g
.map_diag
(sns
.kdeplot
)
g
.map_offdiag
(plt
.scatter
)
g
.add_legend
();
指定变量进行画图
g
= sns
.PairGrid
(iris
, vars=["sepal_length", "sepal_width"], hue
="species")
g
.map(plt
.scatter
);
上三角和下三角选择不同的类型画图
g
= sns
.PairGrid
(iris
,hue
="species",)
g
.map_upper
(plt
.scatter
)
g
.map_lower
(sns
.kdeplot
, cmap
="Blues_d")
g
.map_diag
(sns
.kdeplot
, lw
=3, legend
=False);