ch08-数据可视化

关于数据可视化

1
2
3
4
5
6
7
8
9
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

最简单的例子

1
plt.plot(np.arange(10))
[<matplotlib.lines.Line2D at 0x120132198>]

png

Figure和Subplot

1
2
3
4
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)

png

1
plt.plot(np.random.randn(50).cumsum(),'k--')
[<matplotlib.lines.Line2D at 0x120b7ad68>]

png

1
2
_ = ax1.hist(np.random.randn(100),bins=20,color='k',alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
<matplotlib.collections.PathCollection at 0x1203c5208>
1
fig

png

1
2
fig, axes = plt.subplots(2, 3)
axes
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x120a9bcc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120f54c18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120f7b2e8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x120fa1438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120fc9b00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120ffc198>]],
      dtype=object)

png

调整subplot周围的间距

1
2
3
4
5
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
for j in range(2):
axes[i,j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0.1,hspace=0.1)

png

颜色、标记和类型

1
plt.figure()
<Figure size 432x288 with 0 Axes>




<Figure size 432x288 with 0 Axes>
1
2
from numpy.random import randn
plt.plot(randn(50).cumsum(),linestyle='--',color='b')
[<matplotlib.lines.Line2D at 0x1213f3a90>]

png

1
plt.plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')
[<matplotlib.lines.Line2D at 0x1213f8fd0>]

png

1
plt.plot(randn(30).cumsum(),'ko--')   #ko--是把参数组合在一起了.... color = 'k' marker = 'o' linestyle = '--'
[<matplotlib.lines.Line2D at 0x1214baf60>]

png

1
plt.close('all')
1
2
3
4
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')
<matplotlib.legend.Legend at 0x1216ab9e8>

png

刻度、标签和图例

设置细节

1
2
3
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(randn(1000).cumsum())
[<matplotlib.lines.Line2D at 0x1226302b0>]

png

1
2
3
ticks  = ax.set_xticks([0,250,500,750,1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
rotation=30, fontsize='small')
1
ax.set_title('My First Title of Matplotlib')
Text(0.5,1,'My First Title of Matplotlib')
1
ax.set_xlabel('Stage')
Text(0.5,3.2,'Stage')
1
fig

png

添加图例

1
2
3
4
5
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(randn(100).cumsum(), 'k', label='one')
ax.plot(randn(100).cumsum(), 'k--', label='two')
ax.plot(randn(100).cumsum(), 'k.', label='three')
[<matplotlib.lines.Line2D at 0x1223a1940>]

png

1
2
ax.legend(loc='best')
fig

png

注解以及在Subplot上绘图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
(datetime(2007, 10, 11), 'Peak of bull market'),
(datetime(2008, 3, 12), 'Bear Stearns Fails'),
(datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
ax.annotate(label, xy=(date, spx.asof(date) + 75),
xytext=(date, spx.asof(date) + 225),
arrowprops=dict(facecolor='black', headwidth=4, width=2,
headlength=4),
horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')
Text(0.5,1,'Important dates in the 2008-2009 financial crisis')

png

1
2
3
4
5
6
7
8
9
10
11
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
<matplotlib.patches.Polygon at 0x123523eb8>

png

将图表保持为文件

1
fig.savefig('/Users/zhangyangfenbi.com/Desktop/demo.png')

写在最后

matplotlib实际上还是一个比较低级的工具,绘图都是组装起来的。书中介绍了pandas自带的绘图库,不过基于之前已经有了Seaborn,这个就不写pandas的了,后续把Seaborn的坑填上。