import numpy as np
import pandas as pd
函数应用
apply, map, applymap
map()只能用于Series
apply()及applymap()只能用于DataFrame
map将一个函数应用于Series的每个元素
apply将一个函数通过axis参数指定行或列应用于对应的元素(一个apply方法不能同时作用于行和列)(axis=0纵向应用,axis=1横向应用)
applymap则将函数应用于每个元素(同时作用于行和列)
>>> test = pd.DataFrame({'column1' : np.arange(5), 'column2' : np.arange(5,10)}, index=list('abcde'))
>>> # map方法,dataframe取单列series,用map方法取对2余数
>>> test['map'] = test['column1'].map(lambda x:x%2)
>>>
>>> # apply方法,axis=1,指定横向,求column1, column2两列数据的和赋值给apply_column
>>> test['apply_column'] = test[['column1', 'column2']].apply(lambda x:sum(x), axis=1)
column1 column2 map apply_column
a 0 5 0 5
b 1 6 1 7
c 2 7 0 9
d 3 8 1 11
e 4 9 0 13
>>>
>>> # apply方法,axis=0,指定纵向,求column1, column2两列数据的和赋值给apply_column
>>> test.loc['apply_line'] = test[['column1', 'column2']].apply(lambda x:sum(x), axis=0)
column1 column2 map apply_column
a 0 5 0 5
b 1 6 1 7
c 2 7 0 9
d 3 8 1 11
e 4 9 0 13
apply_line 10.0 35.0 NaN NaN
>>>
>>> # axis=1,指定横向,求a, b, c 三行数据的和赋值给apply_column_1
>>> test['apply_column_1'] = test.loc['a':'c'].apply(lambda x:x.sum(), axis=1)
column1 column2 map apply_column apply_column_1
a 0 5 0 5 10
b 1 6 1 7 15
c 2 7 0 9 18
d 3 8 1 11 NaN
e 4 9 0 13 NaN
apply_line 10.0 35.0 NaN NaN NaN
>>>
>>> # applymap方法,所有数据转字符串且在前方加A
>>> test.applymap(lambda x:'A'+str(x))
column1 column2 map apply_column apply_column_1
a A0 A5 A0 A5 A10
b A1 A6 A1 A7 A15
c A2 A7 A0 A9 A18
d A3 A8 A1 A11 Anan
e A4 A9 A0 A13 Anan
apply_line A10.0 A35.0 Anan Anan Anan
排序排名
索引排序
sort_index()
>>> sr = pd.Series(np.arange(3), index=list('eca'))
>>> df = pd.DataFrame(np.arange(12).reshape(3,4),
index=['two', 'one','three'],
columns=list('cbad'))
>>> sr.sort_index(ascending=False) # 按索引降序排列
e 0
c 1
a 2
dtype: int32
>>> df.sort_index(axis=1, ascending=False) # 列索引按降序排列
d c b a
two 3 0 1 2
one 7 4 5 6
three 11 8 9 10
值排序
sort_values()
>>> sr.sort_values(ascending=False, inplace=True) # 按值降序排列,已排序series取代原series
>>> df_new = df.sort_values(by='c',) # by指定按某列的值排序,返回已排序视图,不改变原dataframe
>>> df.sort_values(by=['one','two'], axis=1, inplace=True) # by为行时,需指定axis=1,或axis='columns',返回已排序的新dataframe
值排名
rank()
>>> df = pd.DataFrame(np.array([1,2,3,3,5,5,5,7,6,4,8,1]).reshape((3,4)),
index=['one', 'two', 'three'],
columns=list('abcd'), )
>>> np.random.shuffle(df['a']); np.random.shuffle(df['b']); np.random.shuffle(df['c']); np.random.shuffle(df['d']);
>>> np.random.shuffle(df.loc['one']); np.random.shuffle(df.loc['two']); np.random.shuffle(df.loc['three']);
>>> df # 打乱df行列数据
a b c d
one 6 7 3 3
two 4 8 1 5
three 5 5 1 2
>>> df.rank(1) # 指定轴,1时横向排序,默认为0纵向排序
>>> df['c_min'] = df.loc[: ,'c'] .rank(method='min')
>>> df['c_max'] = df.loc[: ,'c'] .rank(method='max')
>>> df['c_first'] = df.loc[: ,'c'] .rank(method='first')
>>> df['c_dense'] = df.loc[: ,'c'] .rank(method='sense')
>>> df['c_default'] = df.loc[: ,'c'] .rank()













网友评论