Pandas introduction pt1¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
idade= np.random.randint(low=12, high=20, size=14)
idade
Out[2]:
array([12, 18, 13, 14, 16, 18, 19, 14, 19, 18, 16, 17, 19, 12])
In [3]:
dados = {'Nome': ['Joelande','Jociara', 'Basilio', 'Mahysa', 'Carlos', 'João Paulo', 'Nilton', 'Ana Paula', 'Hermilino Danilo', 'Paulo Tadeu', 'Evaldo', 'Fernando' , 'Lorena', 'Jailza'],
'Idade': idade
}
In [9]:
df = pd.DataFrame(dados)
df.head(10)
Out[9]:
Nome | Idade | |
---|---|---|
0 | Joelande | 12 |
1 | Jociara | 18 |
2 | Basilio | 13 |
3 | Mahysa | 14 |
4 | Carlos | 16 |
5 | João Paulo | 18 |
6 | Nilton | 19 |
7 | Ana Paula | 14 |
8 | Hermilino Danilo | 19 |
9 | Paulo Tadeu | 18 |
In [16]:
df['Nome'].values
Out[16]:
array(['Joelande', 'Jociara', 'Basilio', 'Mahysa', 'Carlos', 'João Paulo', 'Nilton', 'Ana Paula', 'Hermilino Danilo', 'Paulo Tadeu', 'Evaldo', 'Fernando', 'Lorena', 'Jailza'], dtype=object)
In [19]:
df = pd.read_csv('https://raw.githubusercontent.com/255ribeiro/curso_python_gis/master/docs/dados_py/heroes_information.csv')
print(df.shape)
df.head()
(734, 11)
Out[19]:
Unnamed: 0 | name | Gender | Eye color | Race | Hair color | Height | Publisher | Skin color | Alignment | Weight | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | A-Bomb | Male | yellow | Human | No Hair | 203.0 | Marvel Comics | - | good | 441.0 |
1 | 1 | Abe Sapien | Male | blue | Icthyo Sapien | No Hair | 191.0 | Dark Horse Comics | blue | good | 65.0 |
2 | 2 | Abin Sur | Male | blue | Ungaran | No Hair | 185.0 | DC Comics | red | good | 90.0 |
3 | 3 | Abomination | Male | green | Human / Radiation | No Hair | 203.0 | Marvel Comics | - | bad | 441.0 |
4 | 4 | Abraxas | Male | blue | Cosmic Entity | Black | -99.0 | Marvel Comics | - | bad | -99.0 |
In [21]:
fltr = df['Gender'] == 'Female'
#print(fltr)
df_female = df[fltr]
df_female.head()
Out[21]:
Unnamed: 0 | name | Gender | Eye color | Race | Hair color | Height | Publisher | Skin color | Alignment | Weight | |
---|---|---|---|---|---|---|---|---|---|---|---|
8 | 8 | Agent 13 | Female | blue | - | Blond | 173.0 | Marvel Comics | - | good | 61.0 |
24 | 24 | Angel Dust | Female | yellow | Mutant | Black | 165.0 | Marvel Comics | - | good | 57.0 |
25 | 25 | Angel Salvadore | Female | brown | - | Black | 163.0 | Marvel Comics | - | good | 54.0 |
26 | 26 | Angela | Female | - | - | - | -99.0 | Image Comics | - | bad | -99.0 |
38 | 38 | Arachne | Female | blue | Human | Blond | 175.0 | Marvel Comics | - | good | 63.0 |
In [22]:
fltr = (df['Gender'] == 'Female') & (df['Race'] == 'Mutant')
df_female_mut = df[fltr]
df_female_mut.head()
Out[22]:
Unnamed: 0 | name | Gender | Eye color | Race | Hair color | Height | Publisher | Skin color | Alignment | Weight | |
---|---|---|---|---|---|---|---|---|---|---|---|
24 | 24 | Angel Dust | Female | yellow | Mutant | Black | 165.0 | Marvel Comics | - | good | 57.0 |
55 | 55 | Aurora | Female | blue | Mutant | Black | 180.0 | Marvel Comics | - | good | 63.0 |
114 | 114 | Blink | Female | green | Mutant | Magenta | 165.0 | Marvel Comics | pink | good | 56.0 |
129 | 129 | Boom-Boom | Female | blue | Mutant | Blond | 165.0 | Marvel Comics | - | good | 55.0 |
167 | 167 | Cerebra | Female | - | Mutant | - | -99.0 | Marvel Comics | - | good | -99.0 |
In [27]:
df_female_mut['Weight'].median()
Out[27]:
55.5
In [32]:
df_gb = df_female_mut.groupby('Alignment')
df_gb['Weight'].mean()
Out[32]:
Alignment bad 56.500000 good 37.647059 neutral 67.000000 Name: Weight, dtype: float64
In [35]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
In [37]:
dic1 = {'c1': [1,2,3], 'c2': [4,5,6]}
df1 = pd.DataFrame(dic1)
df1.head()
Out[37]:
c1 | c2 | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
In [38]:
df1.c2
Out[38]:
0 4 1 5 2 6 Name: c2, dtype: int64
In [39]:
df1['c2']
Out[39]:
0 4 1 5 2 6 Name: c2, dtype: int64
In [43]:
df2 = pd.DataFrame({'c1': [7,8,9], 'c3': [10,11,12]})
Concat dataframes¶
In [44]:
df3 = pd.concat([df1, df2]).reset_index()
df3.head(7)
Out[44]:
index | c1 | c2 | c3 | |
---|---|---|---|---|
0 | 0 | 1 | 4.0 | NaN |
1 | 1 | 2 | 5.0 | NaN |
2 | 2 | 3 | 6.0 | NaN |
3 | 0 | 7 | NaN | 10.0 |
4 | 1 | 8 | NaN | 11.0 |
5 | 2 | 9 | NaN | 12.0 |