1 | import this |
1 | import numpy as np |
array([6. , 7. , 8.1, 2. ])
1 | import numpy as np |
dtype('float64')
1 | np.zeros((3,2)) |
array([[0., 0.],
[0., 0.],
[0., 0.]])
1 | np.ones((3,2)) |
array([[1., 1.],
[1., 1.],
[1., 1.]])
1 | np.empty((3,2)) |
array([[1., 1.],
[1., 1.],
[1., 1.]])
1 | np.arange(5) |
array([0, 1, 2, 3, 4])
1 | test = np.arange(5) |
dtype('int64')
1 | # ones_like(),传入一个数组,根据其形状和dtype创建一个全1数组 |
array([[1, 1, 1],
[1, 1, 1]])
1 | #创建正方的NxN矩阵(对角线为1,其余为0) |
array([[1., 0., 0., 0., 0.],
[0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 1.]])
1 | np.array([1,2,3],dtype=np.int64) |
array([1, 2, 3])
1 | #进行显式类型转换 |
int64
float64
1 | num_str = np.array(['1.2','3.25','4.1'],dtype=np.string_) |
[b'1.2' b'3.25' b'4.1']
array([1.2 , 3.25, 4.1 ])
1 | #任何计算都会被应用到元素级 |
array([[ 1, 4, 9],
[16, 25, 36]])
1 | #与标量进行运算 |
array([[ 2, 4, 6],
[ 8, 10, 12]])
基本的索引和切片
1 | arr = np.arange(10) |
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1 | arr = np.arange(10) |
array([ 0, 1, 2, 3, 4, 99, 99, 7, 8, 9])
1 | arr0 = np.ones((5,2)) |
dtype('float64')
1 | #可以传入一个索引列表来选取单个元素 |
1.0
1 | #高维数据 |
array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
1 | arr3d |
array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
1 | old_value = arr3d[0].copy() |
1 | arr3d[0] |
array([[12, 12, 12],
[12, 12, 12]])
1 | arr3d[0]=old_value |
1 | arr3d[0] |
array([[1, 2, 3],
[4, 5, 6]])
1 | arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]]) |
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
1 | arr2d[:2] |
array([[1, 2, 3],
[4, 5, 6]])
1 | arr2d[:2,1:2] |
array([[2],
[5]])
1 | arr2d[:,:2] |
array([[1, 2],
[4, 5],
[7, 8]])
布尔型索引
1 | names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe']) |
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe'], dtype='<U4')
1 | data |
array([[ 1.04334945, -0.51882989, 0.39479822, -1.26167769, -0.60706667],
[-0.10854399, -0.77654652, -0.90842022, -0.91657036, -1.57115294],
[ 0.78047305, -0.55011782, -0.72659944, -0.78787495, 2.10762613],
[-0.94467982, 1.4091048 , 0.4530369 , -1.83722786, -0.14625949],
[ 0.34030044, -1.12975372, 1.03528971, 0.8180118 , 0.42579557],
[-0.07116101, 0.83523538, -0.61881987, -0.5052446 , 1.06253317]])
1 | names == 'Bob' |
array([ True, False, False, True, False, False])
1 | data[names=='Bob'] |
array([[ 0.24865102, 0.11944466, 0.40557113, -1.24757741, 0.16418035],
[-0.0478229 , -0.30082172, -1.18252039, -1.17703784, -0.40956047]])
1 | data[names=='Bob',:2] |
array([[ 0.24865102, 0.11944466],
[-0.0478229 , -0.30082172]])
1 | demo = np.array([1,2,3,-1,-5]) |
array([1, 2, 3, 1, 1])
花式索引
1 | arr=np.empty((8,4)) |
1 | for i in range(8): |
array([[0., 0., 0., 0.],
[1., 1., 1., 1.],
[2., 2., 2., 2.],
[3., 3., 3., 3.],
[4., 4., 4., 4.],
[5., 5., 5., 5.],
[6., 6., 6., 6.],
[7., 7., 7., 7.]])
1 | #传入整数列表或者ndarray获取元素 |
array([[4., 4., 4., 4.],
[2., 2., 2., 2.],
[0., 0., 0., 0.],
[1., 1., 1., 1.]])
1 | arr1=np.arange(32).reshape((8,4)) |
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23],
[24, 25, 26, 27],
[28, 29, 30, 31]])
1 | arr1[[4,2],[3,2]] |
array([19, 10])
数组转置和轴兑换
1 | import numpy as np |
1 | arr = np.arange(15).reshape((3,5)) |
1 | arr |
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
1 | arr.T |
array([[ 0, 5, 10],
[ 1, 6, 11],
[ 2, 7, 12],
[ 3, 8, 13],
[ 4, 9, 14]])
1 | arr= np.random.randn(6,3) |
array([[-1.14899925, 2.01403377, -0.579223 ],
[ 1.29437371, -0.37256935, -0.1998847 ],
[ 0.88795876, 0.38322303, -0.77289001],
[ 0.84318194, 1.57318664, -0.14691985],
[ 0.09926862, -0.84374676, 0.47847472],
[ 0.30721121, 0.7380255 , 1.09155033]])
1 | np.dot(arr.T,arr) |
array([[ 4.59926208, -0.98662632, -0.02053929],
[-0.98662632, 8.0735063 , -1.21754486],
[-0.02053929, -1.21754486, 2.41481777]])
通用函数:快速的元素级组函数
1 | arr = np.arange(10) |
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1 | np.sqrt(arr) |
array([0. , 1. , 1.41421356, 1.73205081, 2. ,
2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])
1 | np.exp(arr) |
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
2.98095799e+03, 8.10308393e+03])
1 | x = np.random.randn(8) |
array([-1.52621084, 0.91491997, 1.8613378 , 0.50723883, 0.26956039,
-0.65576259, 0.81621241, -0.71835102])
1 | y = np.random.randn(8) |
array([-0.61305033, -0.99195929, -0.89955148, -0.63491395, 1.54908888,
-1.82440893, 0.08511608, -0.60391516])
1 | np.maximum(x,y) |
array([-0.61305033, 0.91491997, 1.8613378 , 0.50723883, 1.54908888,
-0.65576259, 0.81621241, -0.60391516])
利用数组进行数据处理
用数组表达式代替循环的做法,我们称之为矢量化
1 | points = np.arange(-5,5,0.01)#1000个间隔点 |
1 | xs,ys=np.meshgrid(points,points) |
1 | xs |
array([[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
...,
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99]])
1 | ys |
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
[-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
[-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
...,
[ 4.97, 4.97, 4.97, ..., 4.97, 4.97, 4.97],
[ 4.98, 4.98, 4.98, ..., 4.98, 4.98, 4.98],
[ 4.99, 4.99, 4.99, ..., 4.99, 4.99, 4.99]])
1 | z=np.sqrt(xs**2+ys**2) |
array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
7.06400028],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
...,
[7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
7.04279774],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568]])
1 | import matplotlib.pyplot as plt |
Text(0.5,1,'Image plot of $\\sqrt{x^2+y^2}$ for a grid of values')
将条件表达式表述为数组运算
1 | np.where? |
1 | xarr = np.random.randn(5) |
array([-0.05191135, 0.46807508, 1.5955647 , -1.21585517, 0.68848672])
1 | yarr=np.random.randn(5) |
array([-1.60333056, 2.16303939, -0.37219312, -1.85605698, 0.41180341])
1 | np.where(xarr>=0,xarr,yarr)#构建布尔型索引,实现想要的东西 |
array([-1.60333056, 0.46807508, 1.5955647 , -1.85605698, 0.68848672])
1 | ##替换所有正值为1,负值为-1 |
array([[ 0.32359596, -1.15124188, 0.12417984, -1.34511765],
[-0.41019678, 1.0543996 , 2.6307449 , 0.74725061],
[ 1.03418855, -0.58064793, -0.61019497, -1.13773196],
[-0.64005234, 0.73911588, 1.15966556, -0.26103626]])
1 | np.where(arr>0,1,-1) |
array([[ 1, -1, 1, -1],
[-1, 1, 1, 1],
[ 1, -1, -1, -1],
[-1, 1, 1, -1]])
数学和统计方法
1 | arr=np.random.randn(5,4) |
array([[ 0.3794937 , -0.91051976, 0.54977469, 0.98390242],
[ 1.24989257, -0.14989659, -0.70528342, 0.66344849],
[ 0.15440786, 0.75716823, -1.54809025, 0.05263153],
[ 0.63369665, -1.47415409, -1.35897948, -0.24638285],
[ 0.36552553, 1.44667304, 1.80073603, 0.70854674]])
1 | arr.mean() |
0.1676295519907499
1 | np.mean(arr) |
0.1676295519907499
1 | arr.sum() |
3.352591039814998
1 | arr.sum(axis=1) # 接受轴参数 |
array([ 1.00265105, 1.05816104, -0.58388263, -2.44581977, 4.32148134])
1 | arr[0] |
array([ 0.3794937 , -0.91051976, 0.54977469, 0.98390242])
1 | arr.sum(0) |
array([ 2.7830163 , -0.33072917, -1.26184243, 2.16214634])
1 | np.sum? |
1 | arr=np.arange(9).reshape((3,3)) |
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
用于布尔型数组的方法
1 | #求和等计算方法中,布尔值会被强制转化为0,1,因为sum()方法可以计数 |
array([ 0.62662142, -0.23714492, 2.52986602, 0.66838534, 0.47275484,
1.81467714, -0.39454002, -2.59347451, 0.90815739, -0.0813537 ])
1 | arr > 0 |
array([ True, False, True, True, True, True, False, False, True,
False])
1 | (arr>0).sum() |
6
1 | (arr<0).any() #若有一个True,则为True |
True
1 | (arr>0).all() #全部为True,则为True |
False
排序
1 | arr=np.random.randn(10) |
array([ 0.23588362, -0.45045835, 1.22450303, -0.2419639 , -0.23873288,
-1.09141889, -0.87760038, -0.53059957, 0.15428331, -1.43959318])
1 | arr.sort() |
array([-1.43959318, -1.09141889, -0.87760038, -0.53059957, -0.45045835,
-0.2419639 , -0.23873288, 0.15428331, 0.23588362, 1.22450303])
1 | #可以在任意轴上排序 |
array([[ 0.34871828, 1.03879317, 0.21363644, 0.05765405, 1.01230602],
[ 0.20640237, -0.2323433 , 0.2214327 , 1.16611884, 0.5123435 ],
[ 0.4660787 , -0.16572832, 0.03096976, 1.07155177, -1.90712269],
[-0.45824044, -0.25984925, -1.37214123, 1.14006713, -0.70677386],
[-2.51549148, 0.1314714 , 1.68439925, -0.92174553, 1.03215197]])
1 | arr.sort? |
1 | arr.sort(axis=1) |
array([[ 0.05765405, 0.21363644, 0.34871828, 1.01230602, 1.03879317],
[-0.2323433 , 0.20640237, 0.2214327 , 0.5123435 , 1.16611884],
[-1.90712269, -0.16572832, 0.03096976, 0.4660787 , 1.07155177],
[-1.37214123, -0.70677386, -0.45824044, -0.25984925, 1.14006713],
[-2.51549148, -0.92174553, 0.1314714 , 1.03215197, 1.68439925]])
1 | arr.sort(1) |
array([[ 0.05765405, 0.21363644, 0.34871828, 1.01230602, 1.03879317],
[-0.2323433 , 0.20640237, 0.2214327 , 0.5123435 , 1.16611884],
[-1.90712269, -0.16572832, 0.03096976, 0.4660787 , 1.07155177],
[-1.37214123, -0.70677386, -0.45824044, -0.25984925, 1.14006713],
[-2.51549148, -0.92174553, 0.1314714 , 1.03215197, 1.68439925]])
唯一化
1 | arr = np.array([3,3,2,1,1,54,223,3,2,3]) |
1 | np.unique(arr) #找出数组中的唯一值,并返回排序的结果 |
array([ 1, 2, 3, 54, 223])
线性代数
1 | x = np.array([[1., 2., 3.], [4., 5., 6.]]) |
array([[ 28., 64.],
[ 67., 181.]])
1 | np.dot(x,np.ones(3)) |
array([ 6., 15.])
1 | arr = np.random.normal(size=(4,4)) |
array([[ 1.11433974, -0.2520489 , -0.2349691 , -0.94610534],
[ 2.28170964, 0.78521532, -2.05844323, -0.40333454],
[-0.1225117 , -0.9144343 , 0.25932307, 0.283972 ],
[-0.63086567, -1.17039446, -0.20103388, -0.21096491]])
随机漫步
1 | nstep = 100 |
array([1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
1 | steps = np.where(draws>0,1,-1) |
array([ 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, -1, -1, -1, 1, 1, -1, -1,
-1, -1, 1, 1, 1, -1, 1, -1, -1, -1, -1, 1, 1, -1, 1, -1, 1,
1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1,
1, -1, -1, -1, 1, -1, -1, 1, -1, 1, -1, -1, -1, -1, 1, -1, 1,
1, 1, 1, 1, 1, -1, 1, -1, -1, -1, 1, 1, -1, -1, -1, 1, 1,
-1, 1, 1, 1, 1, -1, 1, -1, -1, 1, 1, -1, 1, -1, 1])
1 | walk=steps.cumsum() |
1 | walk.min() |
-19
1 | walk.max() |
4
1 | (np.abs(walk)>=5).argmax() |
40