ch04_Numpy基础

1
import this
1
2
3
4
5
6
import numpy as np

#创建ndarray
data0 = [6,7,8.1,2]
data =np.array(data0)
data
array([6. , 7. , 8.1, 2. ])
1
2
3
4
import numpy as np
data2=[[1,2,3],[2.1,6,3]]
arr = np.array(data2)
arr.dtype
dtype('float64')
1
np.zeros((3,2))
array([[0., 0.],
       [0., 0.],
       [0., 0.]])
1
np.ones((3,2))
array([[1., 1.],
       [1., 1.],
       [1., 1.]])
1
2
np.empty((3,2))
### np.empty()返回的是未经初始化的垃圾值
array([[1., 1.],
       [1., 1.],
       [1., 1.]])
1
np.arange(5)
array([0, 1, 2, 3, 4])
1
2
test = np.arange(5)
test.dtype
dtype('int64')
1
2
3
4
# ones_like(),传入一个数组,根据其形状和dtype创建一个全1数组
a=[[1,2,3],[4,5,6]]
b=np.ones_like(a)
b
array([[1, 1, 1],
       [1, 1, 1]])
1
2
#创建正方的NxN矩阵(对角线为1,其余为0)
np.eye(5)
array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])
1
np.array([1,2,3],dtype=np.int64)
array([1, 2, 3])
1
2
3
4
#进行显式类型转换
data = np.array([1,2,3],dtype=np.int64)
print(data.dtype)
print(data.astype(np.float64).dtype)
int64
float64
1
2
3
num_str = np.array(['1.2','3.25','4.1'],dtype=np.string_)
print(num_str)
num_str.astype(float)
[b'1.2' b'3.25' b'4.1']





array([1.2 , 3.25, 4.1 ])
1
2
3
4
#任何计算都会被应用到元素级
arr=np.array([[1,2,3],[4,5,6]])

arr*arr
array([[ 1,  4,  9],
       [16, 25, 36]])
1
2
3
#与标量进行运算
arr=np.array([[1,2,3],[4,5,6]])
arr * 2
array([[ 2,  4,  6],
       [ 8, 10, 12]])

基本的索引和切片

1
2
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1
2
3
arr = np.arange(10)
arr[5:7] = 99
arr
array([ 0,  1,  2,  3,  4, 99, 99,  7,  8,  9])
1
2
arr0 = np.ones((5,2))
arr0[0][1]
dtype('float64')
1
2
3
#可以传入一个索引列表来选取单个元素
arr1=np.ones((5,2))
arr1[1,1]
1.0
1
2
3
#高维数据
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
1
arr3d
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
1
2
old_value = arr3d[0].copy()
arr3d[0]=12
1
arr3d[0]
array([[12, 12, 12],
       [12, 12, 12]])
1
arr3d[0]=old_value
1
arr3d[0]
array([[1, 2, 3],
       [4, 5, 6]])
1
2
arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
1
arr2d[:2]
array([[1, 2, 3],
       [4, 5, 6]])
1
arr2d[:2,1:2]
array([[2],
       [5]])
1
arr2d[:,:2]
array([[1, 2],
       [4, 5],
       [7, 8]])

布尔型索引

1
2
3
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe'])
data = np.random.randn(6,5)
names
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe'], dtype='<U4')
1
data
array([[ 1.04334945, -0.51882989,  0.39479822, -1.26167769, -0.60706667],
       [-0.10854399, -0.77654652, -0.90842022, -0.91657036, -1.57115294],
       [ 0.78047305, -0.55011782, -0.72659944, -0.78787495,  2.10762613],
       [-0.94467982,  1.4091048 ,  0.4530369 , -1.83722786, -0.14625949],
       [ 0.34030044, -1.12975372,  1.03528971,  0.8180118 ,  0.42579557],
       [-0.07116101,  0.83523538, -0.61881987, -0.5052446 ,  1.06253317]])
1
names == 'Bob'
array([ True, False, False,  True, False, False])
1
data[names=='Bob']
array([[ 0.24865102,  0.11944466,  0.40557113, -1.24757741,  0.16418035],
       [-0.0478229 , -0.30082172, -1.18252039, -1.17703784, -0.40956047]])
1
data[names=='Bob',:2]
array([[ 0.24865102,  0.11944466],
       [-0.0478229 , -0.30082172]])
1
2
3
demo = np.array([1,2,3,-1,-5])
demo[demo<0] = 1
demo
array([1, 2, 3, 1, 1])

花式索引

1
arr=np.empty((8,4))
1
2
3
for i in range(8):
arr[i] = i
arr
array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])
1
2
#传入整数列表或者ndarray获取元素
arr[[4,2,0,1]]
array([[4., 4., 4., 4.],
       [2., 2., 2., 2.],
       [0., 0., 0., 0.],
       [1., 1., 1., 1.]])
1
2
arr1=np.arange(32).reshape((8,4))
arr1
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])
1
arr1[[4,2],[3,2]]
array([19, 10])

数组转置和轴兑换

1
import numpy as np
1
arr = np.arange(15).reshape((3,5))
1
arr
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
1
arr.T
array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])
1
2
arr= np.random.randn(6,3)
arr
array([[-1.14899925,  2.01403377, -0.579223  ],
       [ 1.29437371, -0.37256935, -0.1998847 ],
       [ 0.88795876,  0.38322303, -0.77289001],
       [ 0.84318194,  1.57318664, -0.14691985],
       [ 0.09926862, -0.84374676,  0.47847472],
       [ 0.30721121,  0.7380255 ,  1.09155033]])
1
np.dot(arr.T,arr)
array([[ 4.59926208, -0.98662632, -0.02053929],
       [-0.98662632,  8.0735063 , -1.21754486],
       [-0.02053929, -1.21754486,  2.41481777]])

通用函数:快速的元素级组函数

1
2
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1
np.sqrt(arr)
array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])
1
np.exp(arr)
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])
1
2
x = np.random.randn(8)
x
array([-1.52621084,  0.91491997,  1.8613378 ,  0.50723883,  0.26956039,
       -0.65576259,  0.81621241, -0.71835102])
1
2
y = np.random.randn(8)
y
array([-0.61305033, -0.99195929, -0.89955148, -0.63491395,  1.54908888,
       -1.82440893,  0.08511608, -0.60391516])
1
np.maximum(x,y)
array([-0.61305033,  0.91491997,  1.8613378 ,  0.50723883,  1.54908888,
       -0.65576259,  0.81621241, -0.60391516])

利用数组进行数据处理

用数组表达式代替循环的做法,我们称之为矢量化

1
points = np.arange(-5,5,0.01)#1000个间隔点
1
xs,ys=np.meshgrid(points,points)
1
xs
array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ...,
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])
1
ys
array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ...,
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
1
2
z=np.sqrt(xs**2+ys**2)
z
array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
        7.06400028],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       ...,
       [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
        7.04279774],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568]])
1
2
3
import matplotlib.pyplot as plt
plt.imshow(z,cmap=plt.cm.OrRd)
plt.title('Image plot of $\sqrt{x^2+y^2}$ for a grid of values')
Text(0.5,1,'Image plot of $\\sqrt{x^2+y^2}$ for a grid of values')

png

将条件表达式表述为数组运算

1
np.where?
1
2
xarr = np.random.randn(5)
xarr
array([-0.05191135,  0.46807508,  1.5955647 , -1.21585517,  0.68848672])
1
2
yarr=np.random.randn(5)
yarr
array([-1.60333056,  2.16303939, -0.37219312, -1.85605698,  0.41180341])
1
np.where(xarr>=0,xarr,yarr)#构建布尔型索引,实现想要的东西
array([-1.60333056,  0.46807508,  1.5955647 , -1.85605698,  0.68848672])
1
2
3
##替换所有正值为1,负值为-1
arr=np.random.randn(4,4)
arr
array([[ 0.32359596, -1.15124188,  0.12417984, -1.34511765],
       [-0.41019678,  1.0543996 ,  2.6307449 ,  0.74725061],
       [ 1.03418855, -0.58064793, -0.61019497, -1.13773196],
       [-0.64005234,  0.73911588,  1.15966556, -0.26103626]])
1
np.where(arr>0,1,-1)
array([[ 1, -1,  1, -1],
       [-1,  1,  1,  1],
       [ 1, -1, -1, -1],
       [-1,  1,  1, -1]])

数学和统计方法

1
2
arr=np.random.randn(5,4)
arr
array([[ 0.3794937 , -0.91051976,  0.54977469,  0.98390242],
       [ 1.24989257, -0.14989659, -0.70528342,  0.66344849],
       [ 0.15440786,  0.75716823, -1.54809025,  0.05263153],
       [ 0.63369665, -1.47415409, -1.35897948, -0.24638285],
       [ 0.36552553,  1.44667304,  1.80073603,  0.70854674]])
1
arr.mean()
0.1676295519907499
1
np.mean(arr)
0.1676295519907499
1
arr.sum()
3.352591039814998
1
arr.sum(axis=1)  # 接受轴参数
array([ 1.00265105,  1.05816104, -0.58388263, -2.44581977,  4.32148134])
1
arr[0]
array([ 0.3794937 , -0.91051976,  0.54977469,  0.98390242])
1
arr.sum(0)
array([ 2.7830163 , -0.33072917, -1.26184243,  2.16214634])
1
np.sum?
1
2
arr=np.arange(9).reshape((3,3))
arr
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

用于布尔型数组的方法

1
2
3
#求和等计算方法中,布尔值会被强制转化为0,1,因为sum()方法可以计数
arr=np.random.randn(10)
arr
array([ 0.62662142, -0.23714492,  2.52986602,  0.66838534,  0.47275484,
        1.81467714, -0.39454002, -2.59347451,  0.90815739, -0.0813537 ])
1
arr > 0
array([ True, False,  True,  True,  True,  True, False, False,  True,
       False])
1
(arr>0).sum()
6
1
(arr<0).any() #若有一个True,则为True
True
1
(arr>0).all() #全部为True,则为True
False

排序

1
2
arr=np.random.randn(10)
arr
array([ 0.23588362, -0.45045835,  1.22450303, -0.2419639 , -0.23873288,
       -1.09141889, -0.87760038, -0.53059957,  0.15428331, -1.43959318])
1
2
arr.sort()
arr
array([-1.43959318, -1.09141889, -0.87760038, -0.53059957, -0.45045835,
       -0.2419639 , -0.23873288,  0.15428331,  0.23588362,  1.22450303])
1
2
3
#可以在任意轴上排序
arr=np.random.randn(5,5)
arr
array([[ 0.34871828,  1.03879317,  0.21363644,  0.05765405,  1.01230602],
       [ 0.20640237, -0.2323433 ,  0.2214327 ,  1.16611884,  0.5123435 ],
       [ 0.4660787 , -0.16572832,  0.03096976,  1.07155177, -1.90712269],
       [-0.45824044, -0.25984925, -1.37214123,  1.14006713, -0.70677386],
       [-2.51549148,  0.1314714 ,  1.68439925, -0.92174553,  1.03215197]])
1
arr.sort?
1
2
arr.sort(axis=1)
arr
array([[ 0.05765405,  0.21363644,  0.34871828,  1.01230602,  1.03879317],
       [-0.2323433 ,  0.20640237,  0.2214327 ,  0.5123435 ,  1.16611884],
       [-1.90712269, -0.16572832,  0.03096976,  0.4660787 ,  1.07155177],
       [-1.37214123, -0.70677386, -0.45824044, -0.25984925,  1.14006713],
       [-2.51549148, -0.92174553,  0.1314714 ,  1.03215197,  1.68439925]])
1
2
arr.sort(1)
arr
array([[ 0.05765405,  0.21363644,  0.34871828,  1.01230602,  1.03879317],
       [-0.2323433 ,  0.20640237,  0.2214327 ,  0.5123435 ,  1.16611884],
       [-1.90712269, -0.16572832,  0.03096976,  0.4660787 ,  1.07155177],
       [-1.37214123, -0.70677386, -0.45824044, -0.25984925,  1.14006713],
       [-2.51549148, -0.92174553,  0.1314714 ,  1.03215197,  1.68439925]])

唯一化

1
arr = np.array([3,3,2,1,1,54,223,3,2,3])
1
np.unique(arr) #找出数组中的唯一值,并返回排序的结果
array([  1,   2,   3,  54, 223])

线性代数

1
2
3
4
5
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
x
y
np.dot(x,y)
array([[ 28.,  64.],
       [ 67., 181.]])
1
np.dot(x,np.ones(3))
array([ 6., 15.])

1
2
arr = np.random.normal(size=(4,4))
arr
array([[ 1.11433974, -0.2520489 , -0.2349691 , -0.94610534],
       [ 2.28170964,  0.78521532, -2.05844323, -0.40333454],
       [-0.1225117 , -0.9144343 ,  0.25932307,  0.283972  ],
       [-0.63086567, -1.17039446, -0.20103388, -0.21096491]])

随机漫步

1
2
3
nstep = 100
draws = np.random.randint(0,2,size=nstep)
draws
array([1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
1
2
steps = np.where(draws>0,1,-1)
steps
array([ 1,  1,  1,  1, -1, -1, -1, -1,  1,  1, -1, -1, -1,  1,  1, -1, -1,
       -1, -1,  1,  1,  1, -1,  1, -1, -1, -1, -1,  1,  1, -1,  1, -1,  1,
        1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1,
        1, -1, -1, -1,  1, -1, -1,  1, -1,  1, -1, -1, -1, -1,  1, -1,  1,
        1,  1,  1,  1,  1, -1,  1, -1, -1, -1,  1,  1, -1, -1, -1,  1,  1,
       -1,  1,  1,  1,  1, -1,  1, -1, -1,  1,  1, -1,  1, -1,  1])
1
walk=steps.cumsum()
1
walk.min()
-19
1
walk.max()
4
1
(np.abs(walk)>=5).argmax()
40