Jérémie Decock (www.jdhp.org)
Official documentation: http://pandas.pydata.org/pandas-docs/stable/
%matplotlib inline
#%matplotlib notebook
from IPython.display import display
import matplotlib
matplotlib.rcParams['figure.figsize'] = (9, 9)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import pandas as pd
import numpy as np
pd.__version__
data_list = [1, 3, np.nan, 7]
series = pd.Series(data_list)
series
data_array = np.array(data_list)
series = pd.Series(data_array)
series
indices = pd.Series([1, 3, 5, 7])
series = pd.Series([10, 30, 50, 70], index=indices)
series
indices = pd.Series(['A', 'B', 'C', 'D'])
series = pd.Series([10, 30, 50, 70], index=indices)
series
data_dict = {'A': 10, 'B': 30, 'C': 50, 'D': 70}
series = pd.Series(data_dict)
series
series.index
series.values
series.shape
series.dtypes
series.describe()
type(series.describe())
series.memory_usage()
dates = pd.date_range('20130101', periods=6)
dates
dates = pd.date_range(start='2013-01-01', end='2013-01-08')
dates
dates = pd.date_range('2013-01-01', periods=4, freq='M')
dates
num_days = 7
data = np.random.random(num_days)
index = pd.date_range('2017-01-01', periods=num_days)
series = pd.Series(data, index)
series
data_list = [[1, 2, 3], [4, 5, 6]]
df = pd.DataFrame(data_array)
df
data_array = np.array([[1, 2, 3], [4, 5, 6]])
df = pd.DataFrame(data_array)
df
Using lists:
data = [[1, 2, 3], [4, 5, 6]]
index = [10, 20]
columns = ['A', 'B', 'C']
df = pd.DataFrame(data, index, columns)
df
Using numpy arrays
:
data = np.array([[1, 2, 3], [4, 5, 6]])
index = np.array([10, 20])
columns = np.array(['A', 'B', 'C'])
df = pd.DataFrame(data, index=index, columns=columns)
df
Using Series
:
data = np.array([[1, 2, 3], [4, 5, 6]])
index = pd.Series([10, 20])
columns = pd.Series(['A', 'B', 'C'])
df = pd.DataFrame(data, index=index, columns=columns)
df
Dictionary keys define columns label.
data_dict = {'A': 'foo',
'B': [10, 20, 30],
'C': 3}
df = pd.DataFrame(data_dict)
df
To define index as well:
data_dict = {'A': 'foo',
'B': [10, 20, 30],
'C': 3}
df = pd.DataFrame(data_dict, index=[10, 20, 30])
df
df.index
df.columns
df.values
df.shape
df.dtypes
df.info()
df.describe()
type(df.describe())
df.memory_usage()
DataFrame's columns can have different types. But what about rows ?
What append when a DataFrame with columns having different type is transposed ?
data_dict = {'A': 'foo',
'B': [10, 20, 30],
'C': 3}
df = pd.DataFrame(data_dict)
df
df.dtypes
df2 = df.T
df2
df2.dtypes
Panels are deprecated.
Pandas now focuses on 1D (Series
) and 2D (DataFrame
) data structures.
The recommended alternative to work with 3-dimensional data is the xarray python library.
An other workaround: one can simply use a MultiIndex DataFrame
for easily working with higher dimensional data.
See http://pandas.pydata.org/pandas-docs/stable/dsintro.html#deprecate-panel.
Panel4D
and PanelND
are deprecated.
Pandas now focuses on 1D (Series
) and 2D (DataFrame
) data structures.
The recommended alternative to work with n-dimensional data is the xarray python library.
An other workaround: one can simply use a MultiIndex DataFrame
for easily working with higher dimensional data.
See http://pandas.pydata.org/pandas-docs/stable/dsintro.html#panel4d-and-panelnd-deprecated.
Reader functions are accessibles from the top level pd
object.
Writer functions are accessibles from data objects (i.e. Series
, DataFrame
or Panel
objects).
data_array = np.array([[1, 2, 3], [4, 5, 6]])
df = pd.DataFrame(data_array, index=[10, 20], columns=[100, 200, 300])
df
See python_pandas_hdf5_en.ipynb
...
Simplest version:
df.to_csv(path_or_buf="python_pandas_io_test.csv")
!cat python_pandas_io_test.csv
Setting more options:
# FYI, many other options are available
df.to_csv(path_or_buf="python_pandas_io_test.csv",
sep=',',
columns=None,
header=True,
index=True,
index_label=None,
compression=None, # allowed values are 'gzip', 'bz2' or 'xz'
date_format=None)
!cat python_pandas_io_test.csv
Simplest version:
df = pd.read_csv("python_pandas_io_test.csv")
df
Setting more options:
df = pd.read_csv("python_pandas_io_test.csv",
sep=',',
delimiter=None,
header='infer',
names=None,
index_col=0,
usecols=None,
squeeze=False,
prefix=None,
mangle_dupe_cols=True,
dtype=None,
engine=None,
converters=None,
true_values=None,
false_values=None,
skipinitialspace=False,
skiprows=None,
nrows=None,
na_values=None,
keep_default_na=True,
na_filter=True,
verbose=False,
skip_blank_lines=True,
parse_dates=False,
infer_datetime_format=False,
keep_date_col=False,
date_parser=None,
dayfirst=False,
iterator=False,
chunksize=None,
compression='infer',
thousands=None,
decimal=b'.',
lineterminator=None,
quotechar='"',
quoting=0,
escapechar=None,
comment=None,
encoding=None,
dialect=None,
#tupleize_cols=False,
error_bad_lines=True,
warn_bad_lines=True,
skipfooter=0,
#skip_footer=0,
doublequote=True,
delim_whitespace=False,
#as_recarray=False,
#compact_ints=False,
#use_unsigned=False,
low_memory=True,
#buffer_lines=None,
memory_map=False,
float_precision=None)
df
!rm python_pandas_io_test.csv
import io
df.to_json(path_or_buf="python_pandas_io_test.json")
!cat python_pandas_io_test.json
orient
="split"¶df.to_json(path_or_buf="python_pandas_io_test_split.json",
orient="split")
!cat python_pandas_io_test_split.json
orient
="records"¶df.to_json(path_or_buf="python_pandas_io_test_records.json",
orient="records")
!cat python_pandas_io_test_records.json
orient
="index" (the default option for Series
)¶df.to_json(path_or_buf="python_pandas_io_test_index.json",
orient="index")
!cat python_pandas_io_test_index.json
orient
="columns" (the default option for DataFrame
) (for DataFrame
only)¶df.to_json(path_or_buf="python_pandas_io_test_columns.json",
orient="columns")
!cat python_pandas_io_test_columns.json
orient
="values" (for DataFrame
only)¶df.to_json(path_or_buf="python_pandas_io_test_values.json",
orient="values")
!cat python_pandas_io_test_values.json
# FYI, many other options are available
df.to_json(path_or_buf="python_pandas_io_test.json",
orient='columns', # For DataFrame: 'split','records','index','columns' or 'values'
date_format=None, # None, 'epoch' or 'iso'
double_precision=10,
force_ascii=True,
date_unit='ms')
!cat python_pandas_io_test.json
orient
="split"¶Dict like data {index -> [index], columns -> [columns], data -> [values]}
!cat python_pandas_io_test_split.json
df = pd.read_json("python_pandas_io_test_split.json",
orient="split")
df
orient
="records"¶List like [{column -> value}, ... , {column -> value}]
!cat python_pandas_io_test_records.json
df = pd.read_json("python_pandas_io_test_records.json",
orient="records")
df
orient
="index"¶Dict like {index -> {column -> value}}
!cat python_pandas_io_test_index.json
df = pd.read_json("python_pandas_io_test_index.json",
orient="index")
df
orient
="columns"¶Dict like {column -> {index -> value}}
!cat python_pandas_io_test_columns.json
df = pd.read_json("python_pandas_io_test_columns.json",
orient="columns")
df
orient
="values" (for DataFrame
only)¶Just the values array
!cat python_pandas_io_test_values.json
df = pd.read_json("python_pandas_io_test_values.json",
orient="values")
df
df = pd.read_json("python_pandas_io_test.json",
orient=None,
typ='frame',
dtype=True,
convert_axes=True,
convert_dates=True,
keep_default_dates=True,
numpy=False,
precise_float=False,
date_unit=None,
encoding=None,
lines=False)
df
!rm python_pandas_io_test*.json
!echo "- {A: 1, B: 2}" > python_pandas_io_test.yaml
!echo "- {A: 3}" >> python_pandas_io_test.yaml
!echo "- {B: 4}" >> python_pandas_io_test.yaml
!cat python_pandas_io_test.yaml
try:
import yaml
with open('python_pandas_io_test.yaml', 'r') as f:
df = pd.io.json.json_normalize(yaml.load(f))
print(df)
except:
pass
!rm python_pandas_io_test.yaml
Many other file formats can be used to import or export data with JSON.
See the following link for more information: http://pandas.pydata.org/pandas-docs/stable/io.html
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
The following instructions return a Series
.
df.B
df["B"]
df.loc[:,"B"]
df.iloc[:,1]
df[['A','B']]
df.loc[:,['A','B']]
df.iloc[:,0:2]
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=["i" + str(i+1) for i in range(9)],
columns=['A', 'B', 'C'])
df
The following instructions return a Series
.
df.loc["i3"]
df.loc["i3",:]
df.iloc[2] # Select over index
df.iloc[2,:] # Select over index
df.loc[["i3", "i4"],:]
df.iloc[2:4,:] # Select over index
df.B < 50.
type(df.B < 50.)
df[[True, True, True, True, False, False, False, False, False]]
series_mask = pd.Series({'i1': True,
'i2': True,
'i3': True,
'i4': True,
'i5': False,
'i6': False,
'i7': False,
'i8': False,
'i9': False})
df[series_mask]
df[df.B < 50.]
df[df['B'] < 50.]
df[(df.A >= 2) & (df.B < 50)]
This can be written:
df.loc[(df.A >= 2) & (df.B < 50)]
This could be written df[df.A >= 2][df.B < 50]
but this is a bad practice (named "chained indexing").
"When setting values in a pandas object, care must be taken to avoid what is called chained indexing".
See:
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
df[(df.A >= 2) & (df.B < 50)]
df[(df.B < 20) | (df.B > 50)]
df.loc[(df.B < 20) | (df.B > 50), 'C']
df[(df['A'] >= 2) & (df['B'] < 50)]
df.loc[(df.A >= 2) & (df.B < 50), ['A','B']]
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
df.B *= 2.
df
df.B = pow(df.B, 2)
df
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
df[df.B < 50.] *= -1.
df
# df['B'][df['B'] < 50.] = 0 # OK but chain indexing is bad...
# df.A[df.B < 50.] = 0 # OK but chain indexing is bad...
df.loc[df.B < 50., 'A'] = 0
df
WARNING: df[df.B < 50.].A = 0
does NOT work even if df.A[df.B < 50.]
and df[df.B < 50.].A
seems to produce the same result...
"When setting values in a pandas object, care must be taken to avoid what is called chained indexing".
See:
df.loc[(df.B < 50.) & (df.B > 20), 'C'] = 0
df
df.loc[(df.B < 20) | (df.B > 50), 'C'] = -1
df
df[df.B < 50.] = pow(df[df.B < 50.], 2)
df
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
Draw 3 samples:
df.sample(n=30, replace=True)
Sample 90% of the rows:
df.sample(frac=0.9, replace=True)
Draw 3 samples:
df.sample(n=3)
Sample 90% of the rows:
df.sample(frac=0.9)
df.sample(n=30, replace=True, weights=np.arange(len(df)))
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
df = df.sample(frac=1)
df
To reset indexes too:
df = df.sample(frac=1).reset_index(drop=True)
df
NROWS = 7
col1 = np.arange(1., NROWS, 1)
col2 = np.arange(10., NROWS*10, 10)
col3 = np.arange(100., NROWS*100, 100)
np.random.shuffle(col1)
np.random.shuffle(col2)
np.random.shuffle(col3)
data = np.array([col1,
col2,
col3]).T
index = np.arange(1, NROWS, 1)
columns = np.array(['A', 'B', 'C'])
np.random.shuffle(index)
np.random.shuffle(data)
np.random.shuffle(columns)
df = pd.DataFrame(data,
index=index,
columns=columns)
df
df.sort_index()
df.sort_index(axis=0) # axis=0 -> sort by row index
df.sort_index(ascending=False)
df.sort_index(axis=1) # axis=1 -> sort by column label
df.sort_index(axis=1, ascending=False)
df.sort_values(by='B')
df.sort_values(by='B', ascending=False)
df.sort_values(by='B', inplace=True)
df
a = np.array([[3, np.nan, 5, np.nan, 7],
[2, 4, np.nan, 3, 1],
[3, 4, 5, 6, 1]]).T
df = pd.DataFrame(a,
columns=['A', 'B', 'C'])
df
df.isnull()
df.dropna()
df.dropna(how='any') # but 'any' is the default value...
df.dropna(subset=['B'])
df.dropna(subset=['B', 'C'])
df.dropna(axis=1)
df.dropna(axis=1, how='any') # but 'any' is the default value...
df.dropna(axis=1, subset=[2])
df.dropna(axis=1, subset=[1, 2])
df.fillna(value=999)
df.A.isnull().sum()
data_array = np.array([np.arange(1, 10, 1), np.arange(10, 100, 10), np.arange(100, 1000, 100)]).T
df = pd.DataFrame(data_array,
index=np.arange(1, 10, 1),
columns=['A', 'B', 'C'])
df
df.T
a1 = np.array([np.arange(1, 5, 1), np.arange(10, 50, 10), np.arange(100, 500, 100)]).T
df1 = pd.DataFrame(a1,
columns=['ID', 'B', 'C'])
a2 = np.array([np.arange(1, 5, 1), np.arange(1000, 5000, 1000), np.arange(10000, 50000, 10000)]).T
df2 = pd.DataFrame(a2,
columns=['ID', 'B', 'C'])
display(df1)
display(df2)
df = pd.merge(df1, df2, on="ID", suffixes=('_1', '_2')) #.dropna(how='any')
display(df)
a1 = np.array([np.arange(1, 5, 1), np.arange(10, 50, 10), np.arange(100, 500, 100)]).T
df1 = pd.DataFrame(a1,
columns=['ID', 'B', 'C'])
a2 = np.array([np.arange(1, 5, 1), np.arange(1000, 5000, 1000), np.arange(10000, 50000, 10000)]).T
df2 = pd.DataFrame(a2,
columns=['ID', 'B', 'C'])
df1.iloc[0,2] = np.nan
df1.iloc[1,1] = np.nan
df1.iloc[2,2] = np.nan
df1.iloc[3,1] = np.nan
df2.iloc[0,1] = np.nan
df2.iloc[1,2] = np.nan
df2.iloc[2,1] = np.nan
df2.iloc[3,2] = np.nan
df = pd.merge(df1, df2, on="ID", suffixes=('_1', '_2')) #.dropna(how='any')
display(df1)
display(df2)
display(df)
a1 = np.array([np.arange(1, 5, 1), np.arange(10, 50, 10), np.arange(100, 500, 100)]).T
df1 = pd.DataFrame(a1,
columns=['ID', 'B', 'C'])
a2 = np.array([np.arange(1, 3, 1), np.arange(1000, 3000, 1000), np.arange(10000, 30000, 10000)]).T
df2 = pd.DataFrame(a2,
columns=['ID', 'B', 'C'])
display(df1)
display(df2)
print("Left: use only keys from left frame (SQL: left outer join)")
df = pd.merge(df1, df2, on="ID", how="left", suffixes=('_1', '_2')) #.dropna(how='any')
display(df)
print("Right: use only keys from right frame (SQL: right outer join)")
df = pd.merge(df1, df2, on="ID", how="right", suffixes=('_1', '_2')) #.dropna(how='any')
display(df)
print("Inner: use intersection of keys from both frames (SQL: inner join) [DEFAULT]")
df = pd.merge(df1, df2, on="ID", how="inner", suffixes=('_1', '_2')) #.dropna(how='any')
display(df)
print("Outer: use union of keys from both frames (SQL: full outer join)")
df = pd.merge(df1, df2, on="ID", how="outer", suffixes=('_1', '_2')) #.dropna(how='any')
display(df)
a = np.array([[3, 5, 5, 5, 7, 7, 7, 7],
[2, 4, 4, 3, 1, 3, 3, 2],
[3, 4, 5, 6, 1, 8, 9, 8]]).T
df = pd.DataFrame(a,
columns=['A', 'B', 'C'])
df
df.groupby(["A"]).count()
df.groupby(["A"]).sum().B
df.groupby(["A"]).mean().B
df.groupby(["A","B"]).count()
Rolling with an aggregation window of size 2.
s = pd.Series([1., 0., 5., 2., 1.])
print("DATA:")
print(s)
mean_s = s.rolling(2).mean()
print()
print("ROLLING MEAN:")
print(mean_s)
sum_s = s.rolling(2).sum()
print()
print("ROLLING SUM:")
print(sum_s)
min_s = s.rolling(2).min()
print()
print("ROLLING MIN:")
print(min_s)
max_s = s.rolling(2).max()
print()
print("ROLLING MAX:")
print(max_s)
ax = s.plot(figsize=(18, 3), color="blue")
mean_s.plot(color="red", label="mean", ax=ax)
sum_s.plot(color="green", label="sum", style="--", alpha=0.5, ax=ax)
min_s.plot(color="black", label="min", style=":", alpha=0.25, ax=ax)
max_s.plot(color="black", label="max", style=":", alpha=0.25, ax=ax)
ax.legend();
index = np.arange(0, 20, 0.05)
s = pd.Series(np.sin(index))
s = s + np.random.normal(scale=0.4, size=s.shape)
ax = s.plot(figsize=(18, 3))
s.shape
Rolling with an aggregation window of size 20.
s_mean = s.rolling(20).mean()
s_median = s.rolling(20).median()
s_min = s.rolling(20).min()
s_max = s.rolling(20).max()
ax = s_mean.plot(y='duration', figsize=(18, 8), color="red", label="mean", alpha=0.75)
s_median.plot(ax=ax, color="blue", label="median", alpha=0.75)
s_min.plot(ax=ax, color="blue", alpha=0.5, style=":", label="min")
s_max.plot(ax=ax, color="blue", alpha=0.5, style=":", label="max")
plt.fill_between(s_min.index, s_min.values, s_max.values, facecolor='blue', alpha=0.1)
ax.legend()
ax.set_xlabel('Time');
s_mean.shape
df = pd.DataFrame([["i1", "A", 1],
["i1", "B", 2],
["i2", "A", 3],
["i2", "B", 4]], columns=["foo", "bar", "baz"])
df
df.pivot(index="foo", columns="bar", values="baz")
a = np.array([[3, 5, 5, 5, 7, 7, 7, 7],
[2, 4, 4, 3, 1, 3, 3, 2],
[3, 4, 5, 6, 1, 8, 9, 8]]).T
df = pd.DataFrame(a,
columns=['A', 'B', 'C'])
df
df.A.value_counts()
df.A.value_counts().plot.bar()
df = pd.DataFrame(np.random.normal(size=100000))
df.quantile(0.50)
df.quantile([0.25, 0.75])
df.quantile([0.01, 0.001])
There are 3 main time related types in Pandas (and the equivalent type for Series
and DataFrame
indices):
pandas.Timestamp
(pandas.DatetimeIndex
for indices): pandas equivalent of python's datetime.datetime
pandas.Period
(pandas.PeriodIndex
for indices): represents a period of timepandas.Timedelta
(pandas.TimedeltaIndex
for indices): represents a duration (the difference between two dates or times) i.e. the pandas equivalent of python's datetime.timedelta
A Timestamp is a point in time:
pd.Timestamp(year=2018, month=1, day=1, hour=12, minute=30)
A Period is a range in time (with a "anchored" start time and a "anchored" end time):
p = pd.Period(freq='D', year=2018, month=1, day=1, hour=12, minute=30)
print(p)
print("Start time:", p.start_time)
print("End time:", p.end_time)
A Timedelta is a "floating" duration (i.e. not "anchored" in time):
print(pd.Timedelta(days=5, seconds=30))
ts1 = pd.Timestamp(year=2018, month=1, day=1, hour=12, minute=30)
ts2 = pd.Timestamp(year=2018, month=1, day=2, hour=12, minute=30)
print(ts2 - ts1)
pd.date_range('2018-01-01', '2018-03-01', freq='D')
pd.date_range('2018-01-01', periods=10, freq='h')
pd.date_range('1/1/2012', periods=10, freq='S')
pd.date_range('3/6/2012 00:00', periods=5, freq='D')
pd.date_range('1/1/2012', periods=5, freq='M')
pd.period_range('2018-01-01', '2018-03-01', freq='D')
pd.date_range('2018-01-01', '2018-03-01', freq='D').to_period()
dti = pd.date_range('2012-01-01 00:00', periods=40, freq='D')
ts = pd.Series(np.random.randint(0, 200, len(dti)), index=dti)
ts.plot();
ts.plot(x_compat=True);
dti = pd.date_range('2018-01-01 00:00', '2018-01-03 00:00', freq='H')
ts = pd.Series(np.random.randint(0, 100, len(dti)), index=dti)
ax = ts.plot(x_compat=True, figsize=(16, 4)) # x_compat is required as matplotlib doesn't understand pandas datetime format -> x_compat=True makes the conversion...
# set monthly locator
ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
# set formatter
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
# set font and rotation for date tick labels
plt.gcf().autofmt_xdate()
dti = pd.date_range('2012-1-1 00:00', periods=40, freq='D')
ts = pd.Series(np.random.randint(0, 200, len(dti)), index=dti)
ts
ts["2012-01-09"]
ts[datetime.datetime(2012, 1, 9)]
ts[ts.index < "2012-01-09"]
ts[ts.index > "2012-01-20"]
ts["2012-01-09":"2012-01-20"]
ts[datetime.datetime(2012, 1, 9):datetime.datetime(2012, 1, 20)]
ts[ts.index.day <= 3]
ts[ts.index.month == 2]
ts["2012-02"]
ts[ts.index.dayofweek == 1]
Rolling window size: 1 day
dti = pd.DatetimeIndex(['2018-1-1 00:00', '2018-1-1 06:45', '2018-1-1 12:00',
'2018-1-2 00:00', '2018-1-2 06:00', '2018-1-2 12:00'])
ts = pd.Series([2., 1., 3., 2., 2., 0.], index=dti)
print("DATA:")
print(ts)
ax = ts.plot(figsize=(18, 3), style="*-", color="blue")
ax.vlines(pd.DatetimeIndex(['2018-1-1 00:00', '2018-1-2 00:00']), ymin=0, ymax=8, color="red", linestyle=":", alpha=0.3);
ts_rw = ts.rolling('D').sum() # Rolling window size: 1 day
print()
print("MEAN:")
print(ts_rw)
ts_rw.plot(color="red", label="sum", style="*-", alpha=0.75, ax=ax)
ax.legend()
ax.set_xlabel('Time')
ax.grid(True);
ts.rolling('6h').min()
ts.rolling('3h').mean()
dti = pd.date_range('1/1/2018 00:00', periods=6*480, freq='10min')
ts = pd.Series(np.sin(dti.hour * 2. * np.pi / 24.), index=dti)
ts = ts + np.random.normal(scale=0.4, size=ts.shape)
ax = ts.plot(figsize=(18, 3))
ax.vlines(pd.date_range('1/1/2018 00:00', periods=480/24, freq='D'), ymin=-2, ymax=2, color="red", linestyle=":", alpha=0.3);