函数应用

apply, map, applymap

map()只能用于Series

apply()及applymap()只能用于DataFrame

map将一个函数应用于Series的每个元素

apply将一个函数通过axis参数指定行或列应用于对应的元素（一个apply方法不能同时作用于行和列）（axis=0纵向应用，axis=1横向应用）

applymap则将函数应用于每个元素（同时作用于行和列）

>>> test = pd.DataFrame({'column1' : np.arange(5), 'column2' : np.arange(5,10)}, index=list('abcde'))

>>>  # map方法，dataframe取单列series，用map方法取对2余数

>>> test['map'] = test['column1'].map(lambda x:x%2)

>>>

>>>  # apply方法，axis=1，指定横向，求column1, column2两列数据的和赋值给apply_column

>>> test['apply_column'] = test[['column1', 'column2']].apply(lambda x:sum(x), axis=1)

        column1      column2      map      apply_column

a             0                 5             0             5

b             1                 6             1             7

c             2                 7             0             9

d 3                 8             1             11

e 4 9             0             13

>>>

>>>  # apply方法，axis=0，指定纵向，求column1, column2两列数据的和赋值给apply_column

>>> test.loc['apply_line'] = test[['column1', 'column2']].apply(lambda x:sum(x), axis=0)

                column1      column2      map      apply_column

a                 0                     5             0             5

b                 1                     6             1             7

c                 2                     7             0             9

d      3                     8             1             11

e      4      9             0             13

apply_line 10.0 35.0      NaN          NaN

>>>

>>>  # axis=1，指定横向，求a, b, c 三行数据的和赋值给apply_column_1

>>> test['apply_column_1'] = test.loc['a':'c'].apply(lambda x:x.sum(), axis=1)

                column1      column2      map      apply_column apply_column_1

a                 0                     5             0             5 10

b                 1                     6             1             7 15

c                 2                     7             0             9 18

d      3                     8             1             11 NaN

e      4      9             0             13 NaN

apply_line 10.0 35.0      NaN          NaN NaN

>>>

>>> # applymap方法，所有数据转字符串且在前方加A

>>> test.applymap(lambda x:'A'+str(x))

                column1      column2          map      apply_column apply_column_1

a A0 A5 A0 A5 A10

b A1 A6 A1 A7 A15

c A2 A7 A0 A9 A18

d A3 A8 A1 A11 Anan

e A4 A9 A0 A13 Anan

apply_line A10.0 A35.0 Anan Anan Anan

排序排名

索引排序

sort_index()

>>> sr = pd.Series(np.arange(3), index=list('eca'))

>>> df = pd.DataFrame(np.arange(12).reshape(3,4),

            index=['two', 'one','three'],

            columns=list('cbad'))

>>> sr.sort_index(ascending=False)             # 按索引降序排列

e 0

c 1

a 2

dtype: int32

>>> df.sort_index(axis=1, ascending=False) # 列索引按降序排列

d c      b a

two 3 0     1     2

one 7 4     5     6

three     11     8     9     10

值排序

sort_values()

>>> sr.sort_values(ascending=False, inplace=True) # 按值降序排列，已排序series取代原series

>>> df_new = df.sort_values(by='c',) # by指定按某列的值排序，返回已排序视图，不改变原dataframe

>>> df.sort_values(by=['one','two'], axis=1, inplace=True) # by为行时，需指定axis=1，或axis='columns'，返回已排序的新dataframe

值排名

rank()

>>> df = pd.DataFrame(np.array([1,2,3,3,5,5,5,7,6,4,8,1]).reshape((3,4)),

index=['one', 'two', 'three'],

columns=list('abcd'), )

>>> np.random.shuffle(df['a']);  np.random.shuffle(df['b']);  np.random.shuffle(df['c']);  np.random.shuffle(df['d']);

>>>  np.random.shuffle(df.loc['one']);  np.random.shuffle(df.loc['two']);  np.random.shuffle(df.loc['three']);

>>> df    # 打乱df行列数据

            a      b      c      d

one      6      7      3 3

two      4      8      1 5

three 5      5      1      2

>>> df.rank(1) # 指定轴，1时横向排序，默认为0纵向排序

>>> df['c_min'] = df.loc[: ,'c'] .rank(method='min')

>>> df['c_max'] =  df.loc[: ,'c'] .rank(method='max')

>>> df['c_first'] =  df.loc[: ,'c'] .rank(method='first')

>>> df['c_dense'] =  df.loc[: ,'c'] .rank(method='sense')

>>> df['c_default'] = df.loc[: ,'c'] .rank()