Numpy snippets

Jérémie Decock (www.jdhp.org)

This notebook is inspired by the following page: https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

Open in Colab

Open in Binder

Import directives

In [2]:
import numpy as np
import math

Create arrays

In [3]:
np.array([1, 2, 3])
Out[3]:
array([1, 2, 3])
In [4]:
np.array([[1, 2, 3],[4, 5, 6]])
Out[4]:
array([[1, 2, 3],
       [4, 5, 6]])

Special matrices

In [5]:
np.zeros(3)
Out[5]:
array([0., 0., 0.])
In [6]:
np.zeros((3, 4))
Out[6]:
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
In [7]:
np.ones(3)
Out[7]:
array([1., 1., 1.])
In [8]:
np.ones((3, 4))
Out[8]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])
In [9]:
np.eye(3)
Out[9]:
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

Arange

In [10]:
np.arange(10)
Out[10]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [11]:
np.arange(10, 20)
Out[11]:
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
In [12]:
np.arange(10, 20, 2)
Out[12]:
array([10, 12, 14, 16, 18])

Linspace

In [13]:
np.linspace(0., 2., 5)
Out[13]:
array([0. , 0.5, 1. , 1.5, 2. ])

Meshgrid

In [14]:
xx, yy = np.meshgrid([1, 2, 3], [4, 5, 6])

print(xx)
print()
print(yy)
[[1 2 3]
 [1 2 3]
 [1 2 3]]

[[4 4 4]
 [5 5 5]
 [6 6 6]]

Random

Uniform distribution in [0, 1]

In [15]:
np.random.rand(3)
Out[15]:
array([0.12534372, 0.36853066, 0.55009451])
In [16]:
np.random.rand(3, 4)
Out[16]:
array([[0.02583954, 0.57889341, 0.429822  , 0.22703184],
       [0.53470499, 0.94524598, 0.76756525, 0.03075947],
       [0.09197049, 0.37062914, 0.51931686, 0.61009014]])

Poisson distribution

In [17]:
np.random.poisson(10, size=[3, 4])
Out[17]:
array([[15, 11,  7,  8],
       [ 9, 11,  9,  9],
       [13,  8, 12, 10]])

Multivariate normal distribution

In [18]:
mu = np.array([0., 0.])
cov = np.array([[1., 0.3],
                [0.3, 1.]])
num_points = 10

np.random.multivariate_normal(mu, cov, num_points)
Out[18]:
array([[-0.52165696,  1.16901622],
       [ 0.30404194,  1.88894873],
       [ 0.400003  ,  1.3124642 ],
       [ 0.01765615, -0.1387088 ],
       [-1.57706326, -1.51384862],
       [ 0.18403875,  1.96069178],
       [ 0.87658993, -0.27136232],
       [-0.11815757,  1.22887826],
       [ 0.77221924, -0.9303671 ],
       [ 1.96374853,  1.51505435]])
In [19]:
np.get_printoptions()
Out[19]:
{'edgeitems': 3,
 'threshold': 1000,
 'floatmode': 'maxprec',
 'precision': 8,
 'suppress': False,
 'linewidth': 75,
 'nanstr': 'nan',
 'infstr': 'inf',
 'sign': '-',
 'formatter': None,
 'legacy': False}
In [20]:
default_threshold = np.get_printoptions()["threshold"]
default_threshold
Out[20]:
1000

Arrays with more than default_threshold elements are truncated.

In [21]:
max_size = math.ceil(math.sqrt(default_threshold))
max_size
Out[21]:
32
In [22]:
a = np.random.randint(1, size=[max_size + 1, max_size + 1])
a
Out[22]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

Print the full array (set threshold to infinity):

In [23]:
np.set_printoptions(threshold=np.inf)
a
Out[23]:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

Go back to the default threshold:

In [24]:
np.set_printoptions(threshold=default_threshold)
a
Out[24]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

Dimension and shape

In [25]:
a = np.array([[1, 2, 3],[4, 5, 6]])

Number of dimensions:

In [26]:
a.ndim
Out[26]:
2

Number of elements:

In [27]:
a.size
Out[27]:
6

Number of elements per dimension:

In [28]:
a.shape
Out[28]:
(2, 3)

Convert

In [29]:
l = [[1, 2, 3],[4, 5, 6]]
a = np.array([[1, 2, 3],[4, 5, 6]])

Python list to Numpy array

In [30]:
np.array(l)
Out[30]:
array([[1, 2, 3],
       [4, 5, 6]])

Numpy array to Python list

In [31]:
a.tolist()
Out[31]:
[[1, 2, 3], [4, 5, 6]]

Copy

np.copy()

In [32]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a
Out[32]:
array([[1, 2, 3],
       [4, 5, 6]])
In [33]:
b = a.copy()
b
Out[33]:
array([[1, 2, 3],
       [4, 5, 6]])
In [34]:
a[0,0] = 10
print(a)
print(b)
[[10  2  3]
 [ 4  5  6]]
[[1 2 3]
 [4 5 6]]

np.astype()

In [35]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a
Out[35]:
array([[1, 2, 3],
       [4, 5, 6]])
In [36]:
b = a.astype('float64', copy=True)
b
Out[36]:
array([[1., 2., 3.],
       [4., 5., 6.]])
In [37]:
a[0,0] = 10
print(a)
print(b)
[[10  2  3]
 [ 4  5  6]]
[[1. 2. 3.]
 [4. 5. 6.]]

Access elements

In [38]:
a = np.arange(6)
In [39]:
a
Out[39]:
array([0, 1, 2, 3, 4, 5])
In [40]:
a[0]
Out[40]:
0
In [41]:
a[-1]
Out[41]:
5

Slices

In [42]:
a[1:4]
Out[42]:
array([1, 2, 3])
In [43]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a
Out[43]:
array([[  1,   2,   3,   4,   5,   6],
       [ 10,  20,  30,  40,  50,  60],
       [100, 200, 300, 400, 500, 600]])
In [44]:
a[0,1]
Out[44]:
2
In [45]:
a[1, :]
Out[45]:
array([10, 20, 30, 40, 50, 60])
In [46]:
a[1, ::2]
Out[46]:
array([10, 30, 50])
In [47]:
a[:, 1]
Out[47]:
array([  2,  20, 200])
In [48]:
a[0:2, 2:4]
Out[48]:
array([[ 3,  4],
       [30, 40]])
In [49]:
a[1:, 1:]
Out[49]:
array([[ 20,  30,  40,  50,  60],
       [200, 300, 400, 500, 600]])
In [50]:
a[:-1, :-1]
Out[50]:
array([[ 1,  2,  3,  4,  5],
       [10, 20, 30, 40, 50]])

Ellipsis

"The ellipsis is used to slice high-dimensional data structures.

It's designed to mean at this point, insert as many full slices (:) to extend the multi-dimensional slice to all dimensions."

https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python

In [51]:
a = np.arange(2**3).reshape(2, 2, 2)
a
Out[51]:
array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

To select all first elements in the last (3rd) dimension

In [52]:
a[..., 0]
Out[52]:
array([[0, 2],
       [4, 6]])

is equivalent to

In [53]:
a[:, :, 0]
Out[53]:
array([[0, 2],
       [4, 6]])

To select all first elements in the first (1st) dimension

In [54]:
a[0, ...]
Out[54]:
array([[0, 1],
       [2, 3]])

is equivalent to

In [55]:
a[0, :, :]
Out[55]:
array([[0, 1],
       [2, 3]])

Filter

In [56]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a
Out[56]:
array([[  1,   2,   3,   4,   5,   6],
       [ 10,  20,  30,  40,  50,  60],
       [100, 200, 300, 400, 500, 600]])

Boolean matrix whose i,jth element is (a_ij > 5)

In [57]:
(a>5)
Out[57]:
array([[False, False, False, False, False,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

Find the indices where (a > 5)

In [58]:
np.nonzero(a>5)
Out[58]:
(array([0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]),
 array([5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]))

Set or copy a with elements greater than 5 zeroed out

In [59]:
a * (a<=5)
Out[59]:
array([[1, 2, 3, 4, 5, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])
In [60]:
a[a>5] = 0
a
Out[60]:
array([[1, 2, 3, 4, 5, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

Select indices satisfying multiple conditions

Short version

In [61]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a
Out[61]:
array([[ -1,   7,   3],
       [-11,  -5,  20]])
In [62]:
a[(a > -10) & (a < 10)] = 0
a
Out[62]:
array([[  0,   0,   0],
       [-11,   0,  20]])
In [63]:
a[(a < -10) | (a > 10)] = 1
a
Out[63]:
array([[0, 0, 0],
       [1, 0, 1]])

Detailed version

In [64]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a
Out[64]:
array([[ -1,   7,   3],
       [-11,  -5,  20]])
In [65]:
m1 = (a > -10)
m2 = (a < 10)

print(m1)
print(m2)
print(m1 & m2)

a[m1 & m2] = 0
a
[[ True  True  True]
 [False  True  True]]
[[ True  True  True]
 [ True  True False]]
[[ True  True  True]
 [False  True False]]
Out[65]:
array([[  0,   0,   0],
       [-11,   0,  20]])

Concatenate

Append 1D arrays

In [66]:
a = np.array([])
a = np.append(a, 3)
a
Out[66]:
array([3.])

Performance test

It's probably not a good idea to use np.append to often as it makes a copy of the array each time it is called...

In [67]:
%%timeit

a = np.array([])
for i in range(10000):
    a = np.append(a, i)
36.9 ms ± 183 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

Lists use a different data structure that makes them more efficient for repeated additions...

In [68]:
%%timeit

l = []
for i in range(10000):
    l.append(i)

a = np.array(l)
804 µs ± 2.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In this case, the better option is probably the following:

In [69]:
%%timeit

a = np.array([i for i in range(10000)])
590 µs ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

Concatenate 1D arrays

In [70]:
a = np.zeros(3)
b = np.ones(3)
print("a:", a)
print("b:", b)
a: [0. 0. 0.]
b: [1. 1. 1.]
In [71]:
np.concatenate([a, b])
Out[71]:
array([0., 0., 0., 1., 1., 1.])
In [72]:
np.hstack([a, b])
Out[72]:
array([0., 0., 0., 1., 1., 1.])

Concatenate 2D arrays

In [73]:
a = np.zeros([2, 3])
b = np.ones([2, 3])
In [74]:
a
Out[74]:
array([[0., 0., 0.],
       [0., 0., 0.]])
In [75]:
b
Out[75]:
array([[1., 1., 1.],
       [1., 1., 1.]])

On the first dimension

Using vstack:

In [76]:
np.vstack([a, b])
Out[76]:
array([[0., 0., 0.],
       [0., 0., 0.],
       [1., 1., 1.],
       [1., 1., 1.]])

Using concatenate:

In [78]:
np.concatenate([a, b], axis=0)
Out[78]:
array([[0., 0., 0.],
       [0., 0., 0.],
       [1., 1., 1.],
       [1., 1., 1.]])

On the second dimension

Using hstack:

In [80]:
np.hstack([a, b])
Out[80]:
array([[0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 1., 1., 1.]])

Using concatenate:

In [82]:
np.concatenate([a, b], axis=1)
Out[82]:
array([[0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 1., 1., 1.]])

Join a sequence of arrays along a new axis

The axis parameter specifies the index of the new axis in the dimensions of the result.

In [84]:
a = np.zeros([2, 3])
b = np.ones([2, 3])
In [85]:
a
Out[85]:
array([[0., 0., 0.],
       [0., 0., 0.]])
In [86]:
b
Out[86]:
array([[1., 1., 1.],
       [1., 1., 1.]])

Along axis 0

In [87]:
np.stack([a, b], axis=0)
Out[87]:
array([[[0., 0., 0.],
        [0., 0., 0.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])
In [88]:
np.stack([a, b], axis=0).shape
Out[88]:
(2, 2, 3)

Along axis 1

In [89]:
np.stack([a, b], axis=1)
Out[89]:
array([[[0., 0., 0.],
        [1., 1., 1.]],

       [[0., 0., 0.],
        [1., 1., 1.]]])
In [90]:
np.stack([a, b], axis=1).shape
Out[90]:
(2, 2, 3)

Along axis 2

In [91]:
np.stack([a, b], axis=2)
Out[91]:
array([[[0., 1.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [0., 1.],
        [0., 1.]]])
In [92]:
np.stack([a, b], axis=2).shape
Out[92]:
(2, 3, 2)

Tile

In [93]:
a = np.array([[1, 2, 3], [4, 5, 6]])
np.tile(a, (2, 3))
Out[93]:
array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6]])

Reshape or transpose

In [94]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Transpose

In [95]:
a.T
Out[95]:
array([[1, 4],
       [2, 5],
       [3, 6]])

Flatten

In [96]:
a.flatten()
Out[96]:
array([1, 2, 3, 4, 5, 6])

Reshape

In [97]:
a = np.arange(6)
a
Out[97]:
array([0, 1, 2, 3, 4, 5])

Row vector to column vector

In [98]:
a.reshape([-1, 1])
Out[98]:
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

Vector to matrix

In [99]:
a.reshape([2, 3])
Out[99]:
array([[0, 1, 2],
       [3, 4, 5]])
In [100]:
a.reshape([3, 2])
Out[100]:
array([[0, 1],
       [2, 3],
       [4, 5]])

Repeat

In [101]:
a = np.arange(3)
a
Out[101]:
array([0, 1, 2])
In [102]:
np.repeat(a, 5)
Out[102]:
array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
In [103]:
a = np.arange(3).reshape([-1, 1])
a
Out[103]:
array([[0],
       [1],
       [2]])
In [104]:
np.repeat(a, 5, axis=0)
Out[104]:
array([[0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [2],
       [2],
       [2],
       [2],
       [2]])
In [105]:
a = np.array([[1, 3, 5],[2, 4, 6]])
a
Out[105]:
array([[1, 3, 5],
       [2, 4, 6]])
In [106]:
np.repeat(a, 5, axis=0)
Out[106]:
array([[1, 3, 5],
       [1, 3, 5],
       [1, 3, 5],
       [1, 3, 5],
       [1, 3, 5],
       [2, 4, 6],
       [2, 4, 6],
       [2, 4, 6],
       [2, 4, 6],
       [2, 4, 6]])

Sort

Return the indices that would sort an array

In [107]:
a = np.array([8, 5, 1])
a
Out[107]:
array([8, 5, 1])
In [108]:
a.argsort()
Out[108]:
array([2, 1, 0])

Sort an array by the $n^{\text{th}}$ column

In [109]:
a = np.array([[4, 4, 2],
              [8, 5, 1],
              [7, 0, 0],
              [3, 1, 1],
              [3, 0, 5]])
a
Out[109]:
array([[4, 4, 2],
       [8, 5, 1],
       [7, 0, 0],
       [3, 1, 1],
       [3, 0, 5]])
In [110]:
n = 0   # the column sorted by
In [111]:
a[a[:,n].argsort()]
Out[111]:
array([[3, 1, 1],
       [3, 0, 5],
       [4, 4, 2],
       [7, 0, 0],
       [8, 5, 1]])
In [112]:
n = 1   # the column sorted by
In [113]:
a[a[:,n].argsort()]
Out[113]:
array([[7, 0, 0],
       [3, 0, 5],
       [3, 1, 1],
       [4, 4, 2],
       [8, 5, 1]])
In [114]:
n = 2   # the column sorted by
In [115]:
a[a[:,n].argsort()]
Out[115]:
array([[7, 0, 0],
       [8, 5, 1],
       [3, 1, 1],
       [4, 4, 2],
       [3, 0, 5]])

Aggregation / reduction

In [116]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Change the axis value in the following functions to aggregate along a given axis.

In [117]:
np.sum(a, axis=None)
Out[117]:
21
In [118]:
np.cumsum(a, axis=None)
Out[118]:
array([ 1,  3,  6, 10, 15, 21])
In [119]:
np.diff(a.ravel())
Out[119]:
array([1, 1, 1, 1, 1])
In [120]:
np.mean(a, axis=None)
Out[120]:
3.5
In [121]:
np.var(a, axis=None)
Out[121]:
2.9166666666666665
In [122]:
np.std(a, axis=None)
Out[122]:
1.707825127659933
In [123]:
np.median(a, axis=None)
Out[123]:
3.5
In [124]:
np.min(a, axis=None)
Out[124]:
1
In [125]:
np.max(a, axis=None)
Out[125]:
6
In [126]:
np.prod(a, axis=None)
Out[126]:
720
In [127]:
np.cumprod(a, axis=None)
Out[127]:
array([  1,   2,   6,  24, 120, 720])

Compute the histogram of a set of data (with a specific binning)

In [128]:
a = np.array([1, 1, 3, 2, 2, 2])
a
Out[128]:
array([1, 1, 3, 2, 2, 2])

All but the last (righthand-most) bin is half-open. In other words, if bins is:

[1, 2, 3, 4]

then the first bin is [1, 2) (including 1, but excluding 2) and the second [2, 3). The last bin, however, is [3, 4], which includes 4.

In [129]:
bins = np.array([1, 2, 3, 4])
bins
Out[129]:
array([1, 2, 3, 4])
In [130]:
hist, bins_ = np.histogram(a, bins=bins)
hist
Out[130]:
array([2, 3, 1])

Linear algebra

Dot product of two arrays

In [131]:
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])
In [132]:
np.dot(a, b)
Out[132]:
140
In [133]:
a.dot(b)
Out[133]:
140

Compute the (multiplicative) inverse of a matrix

In [134]:
a = np.random.normal(size=(3, 3))
a
Out[134]:
array([[ 0.93323055, -2.19963381, -1.2827732 ],
       [ 2.75576877,  1.61770069,  0.13888038],
       [ 0.28722086,  0.23404142, -0.94940979]])
In [135]:
np.linalg.inv(a)
Out[135]:
array([[ 0.20806866,  0.31688281, -0.23477333],
       [-0.35239316,  0.06866514,  0.4861723 ],
       [-0.02392322,  0.11279201, -1.00446336]])

Compute the eigenvalues and right eigenvectors of a square array

In [136]:
a = np.random.normal(size=(3, 3))
a
Out[136]:
array([[ 0.35943398,  1.55300188,  3.27907319],
       [ 0.30531731,  1.94072396,  1.14134848],
       [ 1.91514569,  0.49593907, -0.83453798]])
In [137]:
np.linalg.eig(a)
Out[137]:
(array([-2.76327398,  3.26657585,  0.96231809]),
 array([[ 0.68690122, -0.74470656,  0.58385389],
        [ 0.12895225, -0.52557326, -0.68694518],
        [-0.71521887, -0.41132091,  0.43269037]]))

Singular Value Decomposition

In [138]:
a = np.random.normal(size=(3, 3))
a
Out[138]:
array([[ 0.14487805, -0.3504535 ,  0.35321843],
       [ 0.61227909, -1.24081329,  0.25787963],
       [-1.7316473 ,  1.14311158,  0.67870339]])
In [139]:
U, s, V = np.linalg.svd(a)
print(U, s, V)
[[-0.11218899 -0.44770533  0.88711531]
 [-0.49412049 -0.74941776 -0.44070168]
 [ 0.86212445 -0.48778372 -0.13714396]] [2.47786756 0.92130813 0.16396405] [[-0.73114838  0.66102514  0.1687241 ]
 [ 0.34836757  0.57439653 -0.74074872]
 [ 0.58656806  0.48281922  0.65024881]]

Solve a linear matrix equation, or system of linear scalar equations

In [140]:
a = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
np.linalg.solve(a, b)
Out[140]:
array([2., 3.])

Diagonals

Extract the diagonal:

In [141]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.diag(a)
Out[141]:
array([1, 5, 9])

Make a diagonal matrix:

In [142]:
d = np.array([1, 2, 3])
np.diag(d)
Out[142]:
array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

Trace

In [143]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.trace(a)
Out[143]:
15

Upper and lower triangles of an array

In [144]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
In [145]:
np.triu(a)
Out[145]:
array([[1, 2, 3],
       [0, 5, 6],
       [0, 0, 9]])
In [146]:
np.tril(a)
Out[146]:
array([[1, 0, 0],
       [4, 5, 0],
       [7, 8, 9]])

Data types

Get type

In [147]:
a = np.arange(-2., 2., 0.5)
a.dtype
Out[147]:
dtype('float64')

Size in memory (in bytes)

In [148]:
a = np.arange(-2., 2., 0.5)

Per item:

In [149]:
a.itemsize
Out[149]:
8

Full array:

In [150]:
a.nbytes
Out[150]:
64

Init

In [151]:
a = np.zeros(3)
a.dtype
Out[151]:
dtype('float64')
In [152]:
a = np.zeros(3, dtype=np.bool)
a.dtype
Out[152]:
dtype('bool')
In [153]:
a = np.zeros(3, dtype=np.int)
a.dtype
Out[153]:
dtype('int64')
In [154]:
a = np.zeros(3, dtype=np.int8)
a.dtype
Out[154]:
dtype('int8')
In [155]:
a = np.zeros(3, dtype=np.uint8)
a.dtype
Out[155]:
dtype('uint8')

Conversions

In [156]:
a = np.arange(-2., 2., 0.5)
a
Out[156]:
array([-2. , -1.5, -1. , -0.5,  0. ,  0.5,  1. ,  1.5])
In [157]:
a.astype(np.bool)
Out[157]:
array([ True,  True,  True,  True, False,  True,  True,  True])
In [158]:
a.astype(np.int)
Out[158]:
array([-2, -1, -1,  0,  0,  0,  1,  1])
In [159]:
a.astype(np.int8)
Out[159]:
array([-2, -1, -1,  0,  0,  0,  1,  1], dtype=int8)
In [160]:
a.astype(np.uint8)
Out[160]:
array([254, 255, 255,   0,   0,   0,   1,   1], dtype=uint8)

Masked arrays

Without masked array

In [161]:
a = np.array([[np.nan, 2, 3], [1, np.nan, 6]])
a
Out[161]:
array([[nan,  2.,  3.],
       [ 1., nan,  6.]])
In [162]:
a.min()
/home/jeremie/anaconda3/envs/ailib-dev/lib/python3.7/site-packages/numpy/core/_methods.py:32: RuntimeWarning: invalid value encountered in reduce
  return umr_minimum(a, axis, None, out, keepdims, initial)
Out[162]:
nan
In [163]:
np.nanmin(a)
Out[163]:
1.0
In [164]:
a.max()
/home/jeremie/anaconda3/envs/ailib-dev/lib/python3.7/site-packages/numpy/core/_methods.py:28: RuntimeWarning: invalid value encountered in reduce
  return umr_maximum(a, axis, None, out, keepdims, initial)
Out[164]:
nan
In [165]:
np.nanmax(a)
Out[165]:
6.0
In [166]:
a.mean()
Out[166]:
nan
In [167]:
np.nanmean(a)
Out[167]:
3.0
In [168]:
a.shape
Out[168]:
(2, 3)

With masked array

In [169]:
ma = np.ma.masked_where(np.isnan(a), a)
ma
Out[169]:
masked_array(
  data=[[--, 2.0, 3.0],
        [1.0, --, 6.0]],
  mask=[[ True, False, False],
        [False,  True, False]],
  fill_value=1e+20)
In [170]:
ma.min()
Out[170]:
1.0
In [171]:
ma.max()
Out[171]:
6.0
In [172]:
ma.mean()
Out[172]:
3.0
In [173]:
ma.shape
Out[173]:
(2, 3)