Numpy snippets

Jérémie Decock (www.jdhp.org)

This notebook is inspired by the following page: https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

Open in Colab

Open in Binder

Import directives

In [2]:
import numpy as np
import math

Create arrays

In [3]:
np.array([1, 2, 3])
Out[3]:
array([1, 2, 3])
In [4]:
np.array([[1, 2, 3],[4, 5, 6]])
Out[4]:
array([[1, 2, 3],
       [4, 5, 6]])

Special matrices

In [5]:
np.zeros(3)
Out[5]:
array([0., 0., 0.])
In [6]:
np.zeros((3, 4))
Out[6]:
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
In [7]:
np.ones(3)
Out[7]:
array([1., 1., 1.])
In [8]:
np.ones((3, 4))
Out[8]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])
In [9]:
np.eye(3)
Out[9]:
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

Arange

In [10]:
np.arange(10)
Out[10]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [11]:
np.arange(10, 20)
Out[11]:
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
In [12]:
np.arange(10, 20, 2)
Out[12]:
array([10, 12, 14, 16, 18])

Linspace

In [13]:
np.linspace(0., 2., 5)
Out[13]:
array([0. , 0.5, 1. , 1.5, 2. ])

Meshgrid

In [14]:
xx, yy = np.meshgrid([1, 2, 3], [4, 5, 6])

print(xx)
print()
print(yy)
[[1 2 3]
 [1 2 3]
 [1 2 3]]

[[4 4 4]
 [5 5 5]
 [6 6 6]]

Random

Uniform distribution in [0, 1]

In [15]:
np.random.rand(3)
Out[15]:
array([0.19139462, 0.69656613, 0.85817232])
In [16]:
np.random.rand(3, 4)
Out[16]:
array([[0.39147016, 0.08167819, 0.25074289, 0.3615743 ],
       [0.06971122, 0.48578575, 0.8736717 , 0.7659085 ],
       [0.4107602 , 0.02057075, 0.64395404, 0.25144921]])

Poisson distribution

In [17]:
np.random.poisson(10, size=[3, 4])
Out[17]:
array([[14,  7,  8, 10],
       [11,  7, 16, 11],
       [14,  8,  6, 10]])

Multivariate normal distribution

In [18]:
mu = np.array([0., 0.])
cov = np.array([[1., 0.],
                [1., 1.]])
num_points = 10

np.random.multivariate_normal(mu, cov, num_points)
/home/jeremie/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:6: RuntimeWarning: covariance is not symmetric positive-semidefinite.
  
Out[18]:
array([[ 1.19685399, -0.89637926],
       [ 0.70343392,  1.22329756],
       [-1.47776175, -0.15111255],
       [ 0.42583398, -0.65425883],
       [ 0.7507132 , -0.51463919],
       [ 0.94480182, -0.03359277],
       [ 0.72254175,  0.14974572],
       [ 1.09560291,  0.08233315],
       [-0.75881534, -0.51072946],
       [-0.82831518, -0.77018703]])
In [19]:
np.get_printoptions()
Out[19]:
{'edgeitems': 3,
 'threshold': 1000,
 'floatmode': 'maxprec',
 'precision': 8,
 'suppress': False,
 'linewidth': 75,
 'nanstr': 'nan',
 'infstr': 'inf',
 'sign': '-',
 'formatter': None,
 'legacy': False}
In [20]:
default_threshold = np.get_printoptions()["threshold"]
default_threshold
Out[20]:
1000

Arrays with more than default_threshold elements are truncated.

In [21]:
max_size = math.ceil(math.sqrt(default_threshold))
max_size
Out[21]:
32
In [22]:
a = np.random.randint(1, size=[max_size + 1, max_size + 1])
a
Out[22]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

Print the full array (set threshold to infinity):

In [23]:
np.set_printoptions(threshold=np.inf)
a
Out[23]:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

Go back to the default threshold:

In [24]:
np.set_printoptions(threshold=default_threshold)
a
Out[24]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

Dimension and shape

In [25]:
a = np.array([[1, 2, 3],[4, 5, 6]])

Number of dimensions:

In [26]:
a.ndim
Out[26]:
2

Number of elements:

In [27]:
a.size
Out[27]:
6

Number of elements per dimension:

In [28]:
a.shape
Out[28]:
(2, 3)

Convert

In [29]:
l = [[1, 2, 3],[4, 5, 6]]
a = np.array([[1, 2, 3],[4, 5, 6]])

Python list to Numpy array

In [30]:
np.array(l)
Out[30]:
array([[1, 2, 3],
       [4, 5, 6]])

Numpy array to Python list

In [31]:
a.tolist()
Out[31]:
[[1, 2, 3], [4, 5, 6]]

Copy

np.copy()

In [32]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a
Out[32]:
array([[1, 2, 3],
       [4, 5, 6]])
In [33]:
b = a.copy()
b
Out[33]:
array([[1, 2, 3],
       [4, 5, 6]])
In [34]:
a[0,0] = 10
print(a)
print(b)
[[10  2  3]
 [ 4  5  6]]
[[1 2 3]
 [4 5 6]]

np.astype()

In [35]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a
Out[35]:
array([[1, 2, 3],
       [4, 5, 6]])
In [36]:
b = a.astype('float64', copy=True)
b
Out[36]:
array([[1., 2., 3.],
       [4., 5., 6.]])
In [37]:
a[0,0] = 10
print(a)
print(b)
[[10  2  3]
 [ 4  5  6]]
[[1. 2. 3.]
 [4. 5. 6.]]

Access elements

In [38]:
a = np.arange(6)
In [39]:
a
Out[39]:
array([0, 1, 2, 3, 4, 5])
In [40]:
a[0]
Out[40]:
0
In [41]:
a[-1]
Out[41]:
5

Slices

In [42]:
a[1:4]
Out[42]:
array([1, 2, 3])
In [43]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a
Out[43]:
array([[  1,   2,   3,   4,   5,   6],
       [ 10,  20,  30,  40,  50,  60],
       [100, 200, 300, 400, 500, 600]])
In [44]:
a[0,1]
Out[44]:
2
In [45]:
a[1, :]
Out[45]:
array([10, 20, 30, 40, 50, 60])
In [46]:
a[1, ::2]
Out[46]:
array([10, 30, 50])
In [47]:
a[:, 1]
Out[47]:
array([  2,  20, 200])
In [48]:
a[0:2, 2:4]
Out[48]:
array([[ 3,  4],
       [30, 40]])
In [49]:
a[1:, 1:]
Out[49]:
array([[ 20,  30,  40,  50,  60],
       [200, 300, 400, 500, 600]])
In [50]:
a[:-1, :-1]
Out[50]:
array([[ 1,  2,  3,  4,  5],
       [10, 20, 30, 40, 50]])

Ellipsis

"The ellipsis is used to slice high-dimensional data structures.

It's designed to mean at this point, insert as many full slices (:) to extend the multi-dimensional slice to all dimensions."

https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python

In [51]:
a = np.arange(2**3).reshape(2, 2, 2)
a
Out[51]:
array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

To select all first elements in the last (3rd) dimension

In [52]:
a[..., 0]
Out[52]:
array([[0, 2],
       [4, 6]])

is equivalent to

In [53]:
a[:, :, 0]
Out[53]:
array([[0, 2],
       [4, 6]])

To select all first elements in the first (1st) dimension

In [54]:
a[0, ...]
Out[54]:
array([[0, 1],
       [2, 3]])

is equivalent to

In [55]:
a[0, :, :]
Out[55]:
array([[0, 1],
       [2, 3]])

Filter

In [56]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a
Out[56]:
array([[  1,   2,   3,   4,   5,   6],
       [ 10,  20,  30,  40,  50,  60],
       [100, 200, 300, 400, 500, 600]])

Boolean matrix whose i,jth element is (a_ij > 5)

In [57]:
(a>5)
Out[57]:
array([[False, False, False, False, False,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

Find the indices where (a > 5)

In [58]:
np.nonzero(a>5)
Out[58]:
(array([0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]),
 array([5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]))

Set or copy a with elements greater than 5 zeroed out

In [59]:
a * (a<=5)
Out[59]:
array([[1, 2, 3, 4, 5, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])
In [60]:
a[a>5] = 0
a
Out[60]:
array([[1, 2, 3, 4, 5, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

Select indices satisfying multiple conditions

Short version

In [61]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a
Out[61]:
array([[ -1,   7,   3],
       [-11,  -5,  20]])
In [62]:
a[(a > -10) & (a < 10)] = 0
a
Out[62]:
array([[  0,   0,   0],
       [-11,   0,  20]])
In [63]:
a[(a < -10) | (a > 10)] = 1
a
Out[63]:
array([[0, 0, 0],
       [1, 0, 1]])

Detailed version

In [64]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a
Out[64]:
array([[ -1,   7,   3],
       [-11,  -5,  20]])
In [65]:
m1 = (a > -10)
m2 = (a < 10)

print(m1)
print(m2)
print(m1 & m2)

a[m1 & m2] = 0
a
[[ True  True  True]
 [False  True  True]]
[[ True  True  True]
 [ True  True False]]
[[ True  True  True]
 [False  True False]]
Out[65]:
array([[  0,   0,   0],
       [-11,   0,  20]])

Concatenate

Append 1D arrays

In [66]:
a = np.array([])
a = np.append(a, 3)
a
Out[66]:
array([3.])

Performance test

It's probably not a good idea to use np.append to often as it makes a copy of the array each time it is called...

In [67]:
%%timeit

a = np.array([])
for i in range(10000):
    a = np.append(a, i)
152 ms ± 13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

Lists use a different data structure that makes them more efficient for repeated additions...

In [68]:
%%timeit

l = []
for i in range(10000):
    l.append(i)

a = np.array(l)
2.61 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In this case, the better option is probably the following:

In [69]:
%%timeit

a = np.array([i for i in range(10000)])
1.94 ms ± 77.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

Concatenate 1D arrays

In [70]:
a = np.zeros(3)
b = np.ones(3)
print("a:", a)
print("b:", b)
a: [0. 0. 0.]
b: [1. 1. 1.]
In [71]:
np.concatenate([a, b])
Out[71]:
array([0., 0., 0., 1., 1., 1.])
In [72]:
np.hstack([a, b])
Out[72]:
array([0., 0., 0., 1., 1., 1.])

Concatenate 2D arrays

In [73]:
a = np.zeros([2, 3])
b = np.ones([2, 3])
In [74]:
a
Out[74]:
array([[0., 0., 0.],
       [0., 0., 0.]])
In [75]:
b
Out[75]:
array([[1., 1., 1.],
       [1., 1., 1.]])

On the first dimension

Using vstack:

In [76]:
np.vstack([a, b])
Out[76]:
array([[0., 0., 0.],
       [0., 0., 0.],
       [1., 1., 1.],
       [1., 1., 1.]])

Using concatenate:

In [78]:
np.concatenate([a, b], axis=0)
Out[78]:
array([[0., 0., 0.],
       [0., 0., 0.],
       [1., 1., 1.],
       [1., 1., 1.]])

On the second dimension

Using hstack:

In [80]:
np.hstack([a, b])
Out[80]:
array([[0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 1., 1., 1.]])

Using concatenate:

In [82]:
np.concatenate([a, b], axis=1)
Out[82]:
array([[0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 1., 1., 1.]])

Join a sequence of arrays along a new axis

The axis parameter specifies the index of the new axis in the dimensions of the result.

In [84]:
a = np.zeros([2, 3])
b = np.ones([2, 3])
In [85]:
a
Out[85]:
array([[0., 0., 0.],
       [0., 0., 0.]])
In [86]:
b
Out[86]:
array([[1., 1., 1.],
       [1., 1., 1.]])

Along axis 0

In [87]:
np.stack([a, b], axis=0)
Out[87]:
array([[[0., 0., 0.],
        [0., 0., 0.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])
In [88]:
np.stack([a, b], axis=0).shape
Out[88]:
(2, 2, 3)

Along axis 1

In [89]:
np.stack([a, b], axis=1)
Out[89]:
array([[[0., 0., 0.],
        [1., 1., 1.]],

       [[0., 0., 0.],
        [1., 1., 1.]]])
In [90]:
np.stack([a, b], axis=1).shape
Out[90]:
(2, 2, 3)

Along axis 2

In [91]:
np.stack([a, b], axis=2)
Out[91]:
array([[[0., 1.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [0., 1.],
        [0., 1.]]])
In [92]:
np.stack([a, b], axis=2).shape
Out[92]:
(2, 3, 2)

Tile

In [93]:
a = np.array([[1, 2, 3], [4, 5, 6]])
np.tile(a, (2, 3))
Out[93]:
array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6]])

Reshape or transpose

In [94]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Transpose

In [95]:
a.T
Out[95]:
array([[1, 4],
       [2, 5],
       [3, 6]])

Flatten

In [96]:
a.flatten()
Out[96]:
array([1, 2, 3, 4, 5, 6])

Reshape

In [97]:
a = np.arange(6)
a
Out[97]:
array([0, 1, 2, 3, 4, 5])

Row vector to column vector

In [98]:
a.reshape([-1, 1])
Out[98]:
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

Vector to matrix

In [99]:
a.reshape([2, 3])
Out[99]:
array([[0, 1, 2],
       [3, 4, 5]])
In [100]:
a.reshape([3, 2])
Out[100]:
array([[0, 1],
       [2, 3],
       [4, 5]])

Sort

Return the indices that would sort an array

In [101]:
a = np.array([8, 5, 1])
a
Out[101]:
array([8, 5, 1])
In [102]:
a.argsort()
Out[102]:
array([2, 1, 0])

Sort an array by the $n^{\text{th}}$ column

In [103]:
a = np.array([[4, 4, 2],
              [8, 5, 1],
              [7, 0, 0],
              [3, 1, 1],
              [3, 0, 5]])
a
Out[103]:
array([[4, 4, 2],
       [8, 5, 1],
       [7, 0, 0],
       [3, 1, 1],
       [3, 0, 5]])
In [104]:
n = 0   # the column sorted by
In [105]:
a[a[:,n].argsort()]
Out[105]:
array([[3, 1, 1],
       [3, 0, 5],
       [4, 4, 2],
       [7, 0, 0],
       [8, 5, 1]])
In [106]:
n = 1   # the column sorted by
In [107]:
a[a[:,n].argsort()]
Out[107]:
array([[7, 0, 0],
       [3, 0, 5],
       [3, 1, 1],
       [4, 4, 2],
       [8, 5, 1]])
In [108]:
n = 2   # the column sorted by
In [109]:
a[a[:,n].argsort()]
Out[109]:
array([[7, 0, 0],
       [8, 5, 1],
       [3, 1, 1],
       [4, 4, 2],
       [3, 0, 5]])

Aggregation / reduction

In [110]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Change the axis value in the following functions to aggregate along a given axis.

In [111]:
np.sum(a, axis=None)
Out[111]:
21
In [112]:
np.cumsum(a, axis=None)
Out[112]:
array([ 1,  3,  6, 10, 15, 21])
In [113]:
np.diff(a.ravel())
Out[113]:
array([1, 1, 1, 1, 1])
In [114]:
np.mean(a, axis=None)
Out[114]:
3.5
In [115]:
np.var(a, axis=None)
Out[115]:
2.9166666666666665
In [116]:
np.std(a, axis=None)
Out[116]:
1.707825127659933
In [117]:
np.median(a, axis=None)
Out[117]:
3.5
In [118]:
np.min(a, axis=None)
Out[118]:
1
In [119]:
np.max(a, axis=None)
Out[119]:
6
In [120]:
np.prod(a, axis=None)
Out[120]:
720
In [121]:
np.cumprod(a, axis=None)
Out[121]:
array([  1,   2,   6,  24, 120, 720])

Compute the histogram of a set of data (with a specific binning)

In [122]:
a = np.array([1, 1, 3, 2, 2, 2])
a
Out[122]:
array([1, 1, 3, 2, 2, 2])

All but the last (righthand-most) bin is half-open. In other words, if bins is:

[1, 2, 3, 4]

then the first bin is [1, 2) (including 1, but excluding 2) and the second [2, 3). The last bin, however, is [3, 4], which includes 4.

In [123]:
bins = np.array([1, 2, 3, 4])
bins
Out[123]:
array([1, 2, 3, 4])
In [124]:
hist, bins_ = np.histogram(a, bins=bins)
hist
Out[124]:
array([2, 3, 1])

Linear algebra

Dot product of two arrays

In [125]:
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])
In [126]:
np.dot(a, b)
Out[126]:
140
In [127]:
a.dot(b)
Out[127]:
140

Compute the (multiplicative) inverse of a matrix

In [128]:
a = np.random.normal(size=(3, 3))
a
Out[128]:
array([[-0.10843773,  1.0421794 ,  0.44795244],
       [-0.26130745, -0.60551493,  1.32335202],
       [-0.31268474,  0.29052963,  3.64179301]])
In [129]:
np.linalg.inv(a)
Out[129]:
array([[-3.58419511, -5.0729198 ,  2.28426151],
       [ 0.74439426, -0.35271247,  0.03660547],
       [-0.36712457, -0.40742326,  0.4677967 ]])

Compute the eigenvalues and right eigenvectors of a square array

In [130]:
a = np.random.normal(size=(3, 3))
a
Out[130]:
array([[-0.52645921,  0.01268815, -0.29545473],
       [ 0.26802005, -0.69021174,  0.44119483],
       [-1.76598217,  1.4243306 ,  1.95451704]])
In [131]:
np.linalg.eig(a)
Out[131]:
(array([ 2.33104649, -0.49833989, -1.09486052]),
 array([[-0.10131405, -0.60745289, -0.29841767],
        [ 0.13494829, -0.79400162,  0.78677039],
        [ 0.98565938,  0.02371513, -0.54031402]]))

Singular Value Decomposition

In [132]:
a = np.random.normal(size=(3, 3))
a
Out[132]:
array([[ 1.52266258,  0.03945301,  0.52087021],
       [-0.16733558, -0.33904757, -0.05026985],
       [ 1.30577543,  0.57382822,  0.03343758]])
In [133]:
U, s, V = np.linalg.svd(a)
print(U, s, V)
[[-0.74640472  0.62906876  0.21714625]
 [ 0.11505271  0.44335941 -0.8889293 ]
 [-0.65547148 -0.63851776 -0.40330163]] [2.10189009 0.57019954 0.17094458] [[-0.95707835 -0.21151643 -0.19814597]
 [ 0.08752874 -0.86268183  0.49811522]
 [-0.27629648  0.45939183  0.84416788]]

Solve a linear matrix equation, or system of linear scalar equations

In [134]:
a = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
np.linalg.solve(a, b)
Out[134]:
array([2., 3.])

Diagonals

Extract the diagonal:

In [135]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.diag(a)
Out[135]:
array([1, 5, 9])

Make a diagonal matrix:

In [136]:
d = np.array([1, 2, 3])
np.diag(d)
Out[136]:
array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

Trace

In [137]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.trace(a)
Out[137]:
15

Upper and lower triangles of an array

In [138]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
In [139]:
np.triu(a)
Out[139]:
array([[1, 2, 3],
       [0, 5, 6],
       [0, 0, 9]])
In [140]:
np.tril(a)
Out[140]:
array([[1, 0, 0],
       [4, 5, 0],
       [7, 8, 9]])

Data types

Get type

In [141]:
a = np.arange(-2., 2., 0.5)
a.dtype
Out[141]:
dtype('float64')

Size in memory (in bytes)

In [142]:
a = np.arange(-2., 2., 0.5)

Per item:

In [143]:
a.itemsize
Out[143]:
8

Full array:

In [144]:
a.nbytes
Out[144]:
64

Init

In [145]:
a = np.zeros(3)
a.dtype
Out[145]:
dtype('float64')
In [146]:
a = np.zeros(3, dtype=np.bool)
a.dtype
Out[146]:
dtype('bool')
In [147]:
a = np.zeros(3, dtype=np.int)
a.dtype
Out[147]:
dtype('int64')
In [148]:
a = np.zeros(3, dtype=np.int8)
a.dtype
Out[148]:
dtype('int8')
In [149]:
a = np.zeros(3, dtype=np.uint8)
a.dtype
Out[149]:
dtype('uint8')

Conversions

In [150]:
a = np.arange(-2., 2., 0.5)
a
Out[150]:
array([-2. , -1.5, -1. , -0.5,  0. ,  0.5,  1. ,  1.5])
In [151]:
a.astype(np.bool)
Out[151]:
array([ True,  True,  True,  True, False,  True,  True,  True])
In [152]:
a.astype(np.int)
Out[152]:
array([-2, -1, -1,  0,  0,  0,  1,  1])
In [153]:
a.astype(np.int8)
Out[153]:
array([-2, -1, -1,  0,  0,  0,  1,  1], dtype=int8)
In [154]:
a.astype(np.uint8)
Out[154]:
array([254, 255, 255,   0,   0,   0,   1,   1], dtype=uint8)

Masked arrays

Without masked array

In [155]:
a = np.array([[np.nan, 2, 3], [1, np.nan, 6]])
a
Out[155]:
array([[nan,  2.,  3.],
       [ 1., nan,  6.]])
In [156]:
a.min()
/home/jeremie/anaconda3/lib/python3.6/site-packages/numpy/core/_methods.py:32: RuntimeWarning: invalid value encountered in reduce
  return umr_minimum(a, axis, None, out, keepdims, initial)
Out[156]:
nan
In [157]:
np.nanmin(a)
Out[157]:
1.0
In [158]:
a.max()
/home/jeremie/anaconda3/lib/python3.6/site-packages/numpy/core/_methods.py:28: RuntimeWarning: invalid value encountered in reduce
  return umr_maximum(a, axis, None, out, keepdims, initial)
Out[158]:
nan
In [159]:
np.nanmax(a)
Out[159]:
6.0
In [160]:
a.mean()
Out[160]:
nan
In [161]:
np.nanmean(a)
Out[161]:
3.0
In [162]:
a.shape
Out[162]:
(2, 3)

With masked array

In [163]:
ma = np.ma.masked_where(np.isnan(a), a)
ma
Out[163]:
masked_array(
  data=[[--, 2.0, 3.0],
        [1.0, --, 6.0]],
  mask=[[ True, False, False],
        [False,  True, False]],
  fill_value=1e+20)
In [164]:
ma.min()
Out[164]:
1.0
In [165]:
ma.max()
Out[165]:
6.0
In [166]:
ma.mean()
Out[166]:
3.0
In [167]:
ma.shape
Out[167]:
(2, 3)