Pandas:¶
- Datetime
- aggregate
- Groupby
- loc & iloc
In [15]:
import pandas as pd
import numpy as np
import datetime as dt
In [16]:
df = pd.read_csv('https://raw.githubusercontent.com/abulbasar/data/master/stocks.small.csv')
df.head()
Out[16]:
date | open | high | low | close | volume | adjclose | symbol | |
---|---|---|---|---|---|---|---|---|
0 | 2000-07-18 | 144.81250 | 144.828125 | 141.437500 | 143.0000 | 50683600.0 | 50.155473 | INTC |
1 | 2000-07-20 | 32.93751 | 34.250010 | 32.812500 | 33.7500 | 3288300.0 | 8.789734 | BEN |
2 | 2000-07-24 | 64.25000 | 67.312477 | 64.187523 | 64.7500 | 948800.0 | 7.689567 | APH |
3 | 2000-07-26 | 21.87500 | 22.125000 | 20.937500 | 20.9375 | 1464300.0 | 15.618320 | SHW |
4 | 2000-07-26 | 42.00000 | 42.312481 | 41.625000 | 41.8750 | 1397600.0 | 9.402721 | STJ |
In [17]:
df['date'] = pd.to_datetime(df['date'], format = "%Y-%m-%d")
df
Out[17]:
date | open | high | low | close | volume | adjclose | symbol | |
---|---|---|---|---|---|---|---|---|
0 | 2000-07-18 | 144.812500 | 144.828125 | 141.437500 | 143.000000 | 50683600.0 | 50.155473 | INTC |
1 | 2000-07-20 | 32.937510 | 34.250010 | 32.812500 | 33.750000 | 3288300.0 | 8.789734 | BEN |
2 | 2000-07-24 | 64.250000 | 67.312477 | 64.187523 | 64.750000 | 948800.0 | 7.689567 | APH |
3 | 2000-07-26 | 21.875000 | 22.125000 | 20.937500 | 20.937500 | 1464300.0 | 15.618320 | SHW |
4 | 2000-07-26 | 42.000000 | 42.312481 | 41.625000 | 41.875000 | 1397600.0 | 9.402721 | STJ |
... | ... | ... | ... | ... | ... | ... | ... | ... |
1841 | 2016-07-29 | 114.089996 | 116.550003 | 114.089996 | 115.209999 | 2520700.0 | 115.209999 | AET |
1842 | 2016-08-01 | 128.809998 | 130.020004 | 127.830002 | 128.220001 | 1576600.0 | 128.220001 | UHS |
1843 | 2016-08-02 | 24.340000 | 24.430000 | 23.990000 | 24.170000 | 7115500.0 | 24.170000 | BSX |
1844 | 2016-08-02 | 63.779999 | 63.840000 | 62.490002 | 62.840000 | 2740500.0 | 62.840000 | ADI |
1845 | 2016-08-04 | 28.010000 | 28.190001 | 27.900000 | 28.049999 | 3899900.0 | 28.049999 | CSX |
1846 rows × 8 columns
In [18]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1846 entries, 0 to 1845 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 1846 non-null datetime64[ns] 1 open 1846 non-null float64 2 high 1846 non-null float64 3 low 1846 non-null float64 4 close 1846 non-null float64 5 volume 1846 non-null float64 6 adjclose 1846 non-null float64 7 symbol 1846 non-null object dtypes: datetime64[ns](1), float64(6), object(1) memory usage: 115.5+ KB
In [19]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1846 entries, 0 to 1845 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 1846 non-null datetime64[ns] 1 open 1846 non-null float64 2 high 1846 non-null float64 3 low 1846 non-null float64 4 close 1846 non-null float64 5 volume 1846 non-null float64 6 adjclose 1846 non-null float64 7 symbol 1846 non-null object dtypes: datetime64[ns](1), float64(6), object(1) memory usage: 115.5+ KB
In [ ]:
dt.timedelta(days = 35)