Manipulation
In [1]:
Copied!
import pandas as pd
import pandas as pd
df manipulation¶
In [35]:
Copied!
df['Name']
df['Name']
Out[35]:
0 tom 1 nick 2 juli Name: Name, dtype: object
In [36]:
Copied!
type(df['Name'])
type(df['Name'])
Out[36]:
pandas.core.series.Series
In [37]:
Copied!
df[['Name']]
df[['Name']]
Out[37]:
Name | |
---|---|
0 | tom |
1 | nick |
2 | juli |
In [38]:
Copied!
type(df[['Name']])
type(df[['Name']])
Out[38]:
pandas.core.frame.DataFrame
In [39]:
Copied!
df['Age'].max()
df['Age'].max()
Out[39]:
20
In [40]:
Copied!
df[df['Age']>10]
df[df['Age']>10]
Out[40]:
Name | Age | |
---|---|---|
1 | nick | 15 |
2 | juli | 20 |
In [42]:
Copied!
df[df['Age']>10][['Name']]
df[df['Age']>10][['Name']]
Out[42]:
Name | |
---|---|
1 | nick |
2 | juli |
In [46]:
Copied!
df.loc[df.Name.str.contains('nic')]
df.loc[df.Name.str.contains('nic')]
Out[46]:
Name | Age | |
---|---|---|
1 | nick | 15 |
In [52]:
Copied!
df1 = df[~df['Name'].str.contains('nick')]
df1
df1 = df[~df['Name'].str.contains('nick')]
df1
Out[52]:
Name | Age | |
---|---|---|
0 | tom | 10 |
2 | juli | 20 |
In [54]:
Copied!
df1 = df[~df['Name'].isin(['nick', 'tom'])]
df1
df1 = df[~df['Name'].isin(['nick', 'tom'])]
df1
Out[54]:
Name | Age | |
---|---|---|
2 | juli | 20 |
In [55]:
Copied!
df[~df['Name'].str.contains("#@'£$%abc?", regex=False)]
df[~df['Name'].str.contains("#@'£$%abc?", regex=False)]
Out[55]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 20 |
In [57]:
Copied!
df['tmp'] = df['Name'].str.replace('o','oo')
df
df['tmp'] = df['Name'].str.replace('o','oo')
df
Out[57]:
Name | Age | tmp | |
---|---|---|---|
0 | tom | 10 | toom |
1 | nick | 15 | nick |
2 | juli | 20 | juli |
In [58]:
Copied!
df1 = df.loc[df.Name.str.len() > len('hello')].reset_index(drop=True)
df1
df1 = df.loc[df.Name.str.len() > len('hello')].reset_index(drop=True)
df1
Out[58]:
Name | Age | tmp |
---|
In [4]:
Copied!
df1 = df[df['Name'].notna()]
df1
df1 = df[df['Name'].notna()]
df1
Out[4]:
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 20 |
In [7]:
Copied!
import numpy as np
import numpy as np
In [ ]:
Copied!
df['a'] = df['a'].astype(int)
df['a'] = df['a'].astype(int)
In [10]:
Copied!
df['a'] = pd.to_numeric(df['a'])
df['a'] = pd.to_numeric(df['a'])
lambda¶
In [14]:
Copied!
df = df.apply(lambda x:np.square(x) if x.name == 'd' else x, axis=1)
df = df.apply(lambda x:np.square(x) if x.name == 'd' else x, axis=1)
In [ ]:
Copied!
df.apply(lambda x: func(x['col1'], x['col2']), axis=1)
df.apply(lambda x: func(x['col1'], x['col2']), axis=1)
In [ ]:
Copied!
df['c'] = df.apply(lambda x:max(len(x['a']), len(x['b'])), axis=1)
df['c'] = df.apply(lambda x:max(len(x['a']), len(x['b'])), axis=1)
In [ ]:
Copied!
df['a'] = df.apply(lambda x: x['b'][x['b'].find('=')+1:len(x['b'])-1], axis=1)
df['a'] = df.apply(lambda x: x['b'][x['b'].find('=')+1:len(x['b'])-1], axis=1)
In [ ]:
Copied!
df['a'] = df.apply(lambda x:time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x['a']/1000)), axis=1)
df['a'] = df.apply(lambda x:time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x['a']/1000)), axis=1)
update column based on condition¶
In [ ]:
Copied!
df['user2'] = np.where((df['user']=='Unknown'), df['path'].apply(lambda x: x.split('/')[4]), df['user'])
df['user2'] = np.where((df['user']=='Unknown'), df['path'].apply(lambda x: x.split('/')[4]), df['user'])
In [ ]:
Copied!
df.loc[df['a'].str.contains('/'), 'new_col'] = df['a'] #create new column new_col from col c
df.loc[df['a'].str.contains('/'), 'new_col'] = df['a'] #create new column new_col from col c
In [ ]:
Copied!
df.loc[df['a']=='Unknown', 'new_col']=df['a']=='Unknown']['b'].apply(lambda x: x.split('/')[4])
df.loc[df['a']=='Unknown', 'new_col']=df['a']=='Unknown']['b'].apply(lambda x: x.split('/')[4])
In [ ]:
Copied!
df['a'] = df['b'].map(str) + df['c'].map(str)
df['a'] = df['b'].map(str) + df['c'].map(str)
In [ ]:
Copied!
df['c'] = np.where(
df['a']==df['b'],0, np.where(
df['a']>df['b'],1,-1))
df['c'] = np.where(
df['a']==df['b'],0, np.where(
df['a']>df['b'],1,-1))
In [ ]:
Copied!
def f(row):
if row['a'] == row['b']:
val = 0
elif row['a'] > row['b']:
val = 1
else:
val = -1
return val
df['c'] = df.apply(f, axis=1)
def f(row):
if row['a'] == row['b']:
val = 0
elif row['a'] > row['b']:
val = 1
else:
val = -1
return val
df['c'] = df.apply(f, axis=1)
In [ ]:
Copied!
df[(df['a'])>=1) & (df['b']<=1)]
df[(df['a'])>=1) & (df['b']<=1)]
In [ ]:
Copied!
df['a'] = np.where(df[b].apply(lambda x:x in list1), True, False)
df['a'] = np.where(df[b].apply(lambda x:x in list1), True, False)
In [ ]:
Copied!
df.sort_values(['date'], ascending=[False])
df.sort_values(['date'], ascending=[False])
In [ ]:
Copied!
df.groupby(['a','b','c']).\
agg({'latest_date':'max', 'size_in_mb':'sum', 'path':'count'}).\
rename(columns={'path':'file_count'}).\
reset_index().sort_values('size_in_mb', ascending=0).\
reset.index().drop(columns={'index'})
df.groupby(['a','b','c']).\
agg({'latest_date':'max', 'size_in_mb':'sum', 'path':'count'}).\
rename(columns={'path':'file_count'}).\
reset_index().sort_values('size_in_mb', ascending=0).\
reset.index().drop(columns={'index'})
In [ ]:
Copied!
In [ ]:
Copied!
df3 = pd.merge(df1, df2, how="left", on=["col1", "col2"]
df3 = pd.merge(df1, df2, how="left", on=["col1", "col2"]
In [ ]:
Copied!
In [ ]:
Copied!
In [43]:
Copied!
pd.set_option('max_colwidth', 1000)
pd.set_option('max_colwidth', 1000)
In [5]:
Copied!
pd.options.mode.chained_assignment = None # default='warn'
pd.options.mode.chained_assignment = None # default='warn'
In [ ]:
Copied!
df = pd.read_csv('student.csv')
df = pd.read_csv('student.csv')
In [ ]:
Copied!
df = ps.read_csv('path/abc.csv', header=None, delimiter=',', index_col=False,
warn_bad_lines=True, error_bad_lines=False, skiprows=[i for i in range(100, 200)])
df = ps.read_csv('path/abc.csv', header=None, delimiter=',', index_col=False,
warn_bad_lines=True, error_bad_lines=False, skiprows=[i for i in range(100, 200)])
In [ ]:
Copied!
df.to_csv('student.csv', index=False)
df.to_csv('student.csv', index=False)
In [ ]:
Copied!
In [ ]:
Copied!
start_index = lst_path[:lst_path.find(key_word)-1].rfind('/')
start_index = lst_path[:lst_path.find(key_word)-1].rfind('/')
In [ ]:
Copied!
lst_string = [x.decode('utf-8', 'ignore') for x in list_of_byte_string]
lst_string = [x.decode('utf-8', 'ignore') for x in list_of_byte_string]
In [ ]:
Copied!