美文网首页
R|tidyverse|节省管道%>%

R|tidyverse|节省管道%>%

作者: 高大石头 | 来源:发表于2021-04-20 00:27 被阅读0次

翻看简书的时候偶尔发现一篇关于tidyverse管道操作的技术贴:节省tidyverse中的管道 %>% ,还挺实用,实际演练学习下。

示例数据

library(tidyverse)
data("penguins",package = "palmerpenguins")
penguins <- na.omit(penguins)

1. rename()

1.1 select()里用rename()

penguins %>% 
  select(penguins_species = species, #直接进行重命名
         island)
## # A tibble: 333 x 2
##    penguins_species island   
##    <fct>            <fct>    
##  1 Adelie           Torgersen
##  2 Adelie           Torgersen
##  3 Adelie           Torgersen
##  4 Adelie           Torgersen
##  5 Adelie           Torgersen
##  6 Adelie           Torgersen
##  7 Adelie           Torgersen
##  8 Adelie           Torgersen
##  9 Adelie           Torgersen
## 10 Adelie           Torgersen
## # ... with 323 more rows

1.2 count()里面用rename()

penguins %>% 
  count(species, name = "total") #计数后直接赋值
## # A tibble: 3 x 2
##   species   total
##   <fct>     <int>
## 1 Adelie      146
## 2 Chinstrap    68
## 3 Gentoo      119

赋值后再计数

penguins %>% 
  count(penguins_species = species, name = "total")
## # A tibble: 3 x 2
##   penguins_species total
##   <fct>            <int>
## 1 Adelie             146
## 2 Chinstrap           68
## 3 Gentoo             119

注意: 此处传递给name参数的新名称必须用引号引起来,但选定列的新名称不必用引号引起来。

2. count()内部mutate()

penguins %>% 
  count(long_beak=bill_length_mm > 50)
## # A tibble: 2 x 2
##   long_beak     n
##   <lgl>     <int>
## 1 FALSE       281
## 2 TRUE         52

也可以指定多个变量

penguins %>% 
  count(long_beak = bill_length_mm > 50,
        is_adelie = species == "Adelie")
## # A tibble: 3 x 3
##   long_beak is_adelie     n
##   <lgl>     <lgl>     <int>
## 1 FALSE     FALSE       135
## 2 FALSE     TRUE        146
## 3 TRUE      FALSE        52

3.transmute()+select()

实际上,transmute()就像select()继承未修改的列一样,当然可以在执行操作时对其“重命名”

penguins %>% 
  transmute(penguins_species = species,
            island,
            body_mass_kg = body_mass_g/1000)
## # A tibble: 333 x 3
##    penguins_species island    body_mass_kg
##    <fct>            <fct>            <dbl>
##  1 Adelie           Torgersen         3.75
##  2 Adelie           Torgersen         3.8 
##  3 Adelie           Torgersen         3.25
##  4 Adelie           Torgersen         3.45
##  5 Adelie           Torgersen         3.65
##  6 Adelie           Torgersen         3.62
##  7 Adelie           Torgersen         4.68
##  8 Adelie           Torgersen         3.2 
##  9 Adelie           Torgersen         3.8 
## 10 Adelie           Torgersen         4.4 
## # ... with 323 more rows

4. summarize()+ungroup()

summarize(.groups="drop"):删除分组变量

penguins %>% 
  group_by(island, species) %>% 
  summarize(mean_mass = mean(body_mass_g, na.rm = T),.groups = "drop") 
## # A tibble: 5 x 3
##   island    species   mean_mass
##   <fct>     <fct>         <dbl>
## 1 Biscoe    Adelie        3710.
## 2 Biscoe    Gentoo        5092.
## 3 Dream     Adelie        3701.
## 4 Dream     Chinstrap     3733.
## 5 Torgersen Adelie        3709.

5. arrange()+其他功能slice()

penguins %>% 
  top_n(5, wt=body_mass_g) #按列排序前5行
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex  
##   <fct>   <fct>           <dbl>         <dbl>            <int>       <int> <fct>
## 1 Gentoo  Biscoe           49.2          15.2              221        6300 male 
## 2 Gentoo  Biscoe           59.6          17                230        6050 male 
## 3 Gentoo  Biscoe           51.1          16.3              220        6000 male 
## 4 Gentoo  Biscoe           45.2          16.4              223        5950 male 
## 5 Gentoo  Biscoe           49.8          15.9              229        5950 male 
## 6 Gentoo  Biscoe           48.8          16.2              222        6000 male 
## # ... with 1 more variable: year <int>
penguins %>% 
  slice_max(order_by = body_mass_g,n=5) # slice_*的新功能
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex  
##   <fct>   <fct>           <dbl>         <dbl>            <int>       <int> <fct>
## 1 Gentoo  Biscoe           49.2          15.2              221        6300 male 
## 2 Gentoo  Biscoe           59.6          17                230        6050 male 
## 3 Gentoo  Biscoe           51.1          16.3              220        6000 male 
## 4 Gentoo  Biscoe           48.8          16.2              222        6000 male 
## 5 Gentoo  Biscoe           45.2          16.4              223        5950 male 
## 6 Gentoo  Biscoe           49.8          15.9              229        5950 male 
## # ... with 1 more variable: year <int>

slice_*()功能最大的变化是为分组数据添加了适当的行为,例如:

penguins %>% 
  group_by(species) %>% 
  slice_max(body_mass_g,prop=.05) #返回每个物种重量百分比最高的5%的企鹅
## # A tibble: 16 x 8
## # Groups:   species [3]
##    species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g
##    <fct>   <fct>           <dbl>         <dbl>            <int>       <int>
##  1 Adelie  Biscoe           43.2          19                197        4775
##  2 Adelie  Biscoe           41            20                203        4725
##  3 Adelie  Torge~           42.9          17.6              196        4700
##  4 Adelie  Torge~           39.2          19.6              195        4675
##  5 Adelie  Dream            39.8          19.1              184        4650
##  6 Adelie  Dream            39.6          18.8              190        4600
##  7 Adelie  Biscoe           45.6          20.3              191        4600
##  8 Chinst~ Dream            52            20.7              210        4800
##  9 Chinst~ Dream            52.8          20                205        4550
## 10 Chinst~ Dream            53.5          19.9              205        4500
## 11 Gentoo  Biscoe           49.2          15.2              221        6300
## 12 Gentoo  Biscoe           59.6          17                230        6050
## 13 Gentoo  Biscoe           51.1          16.3              220        6000
## 14 Gentoo  Biscoe           48.8          16.2              222        6000
## 15 Gentoo  Biscoe           45.2          16.4              223        5950
## 16 Gentoo  Biscoe           49.8          15.9              229        5950
## # ... with 2 more variables: sex <fct>, year <int>

6. add_count()

按组计数和求和,add_count()添加一列,其中包含每组(或组的组合)的计数

penguins %>% 
  add_count(species, name = "count_by_species") %>% 
  select(-contains("mm"))
## # A tibble: 333 x 6
##    species island    body_mass_g sex     year count_by_species
##    <fct>   <fct>           <int> <fct>  <int>            <int>
##  1 Adelie  Torgersen        3750 male    2007              146
##  2 Adelie  Torgersen        3800 female  2007              146
##  3 Adelie  Torgersen        3250 female  2007              146
##  4 Adelie  Torgersen        3450 female  2007              146
##  5 Adelie  Torgersen        3650 male    2007              146
##  6 Adelie  Torgersen        3625 female  2007              146
##  7 Adelie  Torgersen        4675 male    2007              146
##  8 Adelie  Torgersen        3200 female  2007              146
##  9 Adelie  Torgersen        3800 male    2007              146
## 10 Adelie  Torgersen        4400 male    2007              146
## # ... with 323 more rows

也可以使用wt来按组有效的获取总和:

penguins %>% 
  add_count(species,wt=body_mass_g,
            name = "total_weighted_by_species") %>% 
  select(-contains("mm"))
## # A tibble: 333 x 6
##    species island    body_mass_g sex     year total_weighted_by_species
##    <fct>   <fct>           <int> <fct>  <int>                     <int>
##  1 Adelie  Torgersen        3750 male    2007                    541100
##  2 Adelie  Torgersen        3800 female  2007                    541100
##  3 Adelie  Torgersen        3250 female  2007                    541100
##  4 Adelie  Torgersen        3450 female  2007                    541100
##  5 Adelie  Torgersen        3650 male    2007                    541100
##  6 Adelie  Torgersen        3625 female  2007                    541100
##  7 Adelie  Torgersen        4675 male    2007                    541100
##  8 Adelie  Torgersen        3200 female  2007                    541100
##  9 Adelie  Torgersen        3800 male    2007                    541100
## 10 Adelie  Torgersen        4400 male    2007                    541100
## # ... with 323 more rows

默认情况下,add_tally()添加行数,此时可以使用mutate (n = n())进行处理

penguins %>% 
  add_count(species, wt=body_mass_g,
            name = "total_weight_by_speices") %>% 
  add_tally(wt=body_mass_g,
            name = "total_weight_of_all_species") %>% 
  select(1:2,last_col(0):last_col(1))
## # A tibble: 333 x 4
##    species island    total_weight_of_all_species total_weight_by_speices
##    <fct>   <fct>                           <int>                   <int>
##  1 Adelie  Torgersen                     1400950                  541100
##  2 Adelie  Torgersen                     1400950                  541100
##  3 Adelie  Torgersen                     1400950                  541100
##  4 Adelie  Torgersen                     1400950                  541100
##  5 Adelie  Torgersen                     1400950                  541100
##  6 Adelie  Torgersen                     1400950                  541100
##  7 Adelie  Torgersen                     1400950                  541100
##  8 Adelie  Torgersen                     1400950                  541100
##  9 Adelie  Torgersen                     1400950                  541100
## 10 Adelie  Torgersen                     1400950                  541100
## # ... with 323 more rows

参考链接:

节省tidyverse中的管道%>%

相关文章

网友评论

      本文标题:R|tidyverse|节省管道%>%

      本文链接:https://www.haomeiwen.com/subject/wxtglltx.html