Intro
Pandas is a Python library used for working with data sets. It has functions for analyzing, cleaning, exploring, and manipulating data.
In [1]:
Copied!
import pandas as pd
import pandas as pd
df from list of lists¶
In [16]:
Copied!
data = [['tom', 10], ['nick', 15], ['juli', 20]]
df = pd.DataFrame(data, columns =['Name', 'Age'])
df
data = [['tom', 10], ['nick', 15], ['juli', 20]]
df = pd.DataFrame(data, columns =['Name', 'Age'])
df
Out[16]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 20 |
In [17]:
Copied!
df.shape
df.shape
Out[17]:
(3, 2)
In [25]:
Copied!
df.index
df.index
Out[25]:
RangeIndex(start=0, stop=3, step=1)
In [26]:
Copied!
df.columns
df.columns
Out[26]:
Index(['Name', 'Age'], dtype='object')
In [27]:
Copied!
df.info()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3 entries, 0 to 2 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 3 non-null object 1 Age 3 non-null int64 dtypes: int64(1), object(1) memory usage: 176.0+ bytes
In [28]:
Copied!
df.describe()
df.describe()
Out[28]:
Age | |
---|---|
count | 3.0 |
mean | 15.0 |
std | 5.0 |
min | 10.0 |
25% | 12.5 |
50% | 15.0 |
75% | 17.5 |
max | 20.0 |
df from dictionary of lists¶
In [31]:
Copied!
data = {'Name':['tom', 'nick', 'juli'],
'Age':[10, 15, 20]}
df = pd.DataFrame(data)
df
data = {'Name':['tom', 'nick', 'juli'],
'Age':[10, 15, 20]}
df = pd.DataFrame(data)
df
Out[31]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 20 |
df from list of dictionaries¶
In [32]:
Copied!
data = [{'Name':'tom', 'Age':10},
{'Name':'nick', 'Age':15},
{'Name':'Juli', 'Age':20}]
df = pd.DataFrame(data)
df
data = [{'Name':'tom', 'Age':10},
{'Name':'nick', 'Age':15},
{'Name':'Juli', 'Age':20}]
df = pd.DataFrame(data)
df
Out[32]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | Juli | 20 |
df using zip() function¶
In [34]:
Copied!
name = ['tom', 'nick', 'juli']
age = [10, 15, 20]
df = pd.DataFrame(list(zip(name, age)), columns = ['Name', 'Age'])
df
name = ['tom', 'nick', 'juli']
age = [10, 15, 20]
df = pd.DataFrame(list(zip(name, age)), columns = ['Name', 'Age'])
df
Out[34]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 20 |