Dplyr and Custom Assessment (NSE)

I am trying to write a function that takes the name of a data frame and a column, which is summed with dplyr, and then returns a generic data frame. I tried a bunch of interp () permutations from the lazyeval package, but I spent too much time trying to get it to work. So, I wrote a β€œstatic” version of the function that I want here:

summarize.df.static <- function(){
  temp_df <- mtcars %>%
    group_by(cyl) %>%
    summarize(qsec = mean(qsec),
              mpg=mean(mpg))
  return(temp_df)
}

new_df <- summarize.df.static()
head(new_df)

Here is the beginning of the dynamic version I'm stuck on:

summarize.df.dynamic <- function(df_in,sum_metric_in){
  temp_df <- df_in %>%
    group_by(cyl) %>%
    summarize_(qsec = mean(qsec),
              sum_metric_in=mean(sum_metric_in)) # some mix of interp()
  return(temp_df)
}

new_df <- summarize.df.dynamic(mtcars,"mpg")
head(new_df)

Note that I want the column name in this example also come from the passed parameter (mpg in this case). Also note that the qsec column is static, that is, not passed.

The following is the correct answer sent by "docendo discimus":

summarize.df.dynamic<- function(df_in, sum_metric_in){
  temp_df <- df_in %>%
    group_by(cyl) %>%
    summarize_(qsec = ~mean(qsec), 
               xyz = interp(~mean(var), var = as.name(sum_metric_in))) 

  names(temp_df)[names(temp_df) == "xyz"] <- sum_metric_in  
  return(temp_df)
}

new_df <- summarize.df.dynamic(mtcars,"mpg")
head(new_df)

#  cyl     qsec      mpg
#1   4 19.13727 26.66364
#2   6 17.97714 19.74286
#3   8 16.77214 15.10000

new_df <- summarize.df.dynamic(mtcars,"disp")
head(new_df)

#  cyl     qsec     disp
#1   4 19.13727 105.1364
#2   6 17.97714 183.3143
#3   8 16.77214 353.1000
+4
3

( "qsec" ..) :

library(dplyr)
library(lazyeval)
summarize.df <- function(data, sum_metric_in){
  data <- data %>%
    group_by(cyl) %>%
    summarize_(qsec = ~mean(qsec), 
               xyz = interp(~mean(var), var = as.name(sum_metric_in))) 

  names(data)[names(data) == "xyz"] <- sum_metric_in  
  data
}

summarize.df(mtcars, "mpg")
#Source: local data frame [3 x 3]
#
#  cyl     qsec      mpg
#1   4 19.13727 26.66364
#2   6 17.97714 19.74286
#3   8 16.77214 15.10000

AFAIK (?) "sum_metric_in" dplyr:: rename, , .

+6

paste ~, , summarize_.

df_in %>%
  group_by(cyl) %>%
  summarize_(qsec = ~mean(qsec),
             sum_metric_in=paste0('mean(', sum_metric_in, ')'))
+4

devel dplyr ( 0.6.0 2017 ), quosures

summarise.dfN <- function(df, expr) {
      expr <- enquo(expr) 
      colN <- quo_name(expr)
     df %>%
       group_by(cyl) %>%
       summarise(qsec = mean(qsec),
             !!colN := mean(!!expr))


  }

summarise.dfN(mtcars, mpg)
# A tibble: 3 Γ— 3
#    cyl     qsec      mpg
#  <dbl>    <dbl>    <dbl>
#1     4 19.13727 26.66364
#2     6 17.97714 19.74286
#3     8 16.77214 15.10000

enquo substitute, quosure, quo_name , unquote (!! UQ) group_by/summarise/mutate .. .

As mentioned above, we can also pass grouping variables as arguments

summarise.dfN2 <- function(df, expr, grpVar) {
  expr <- enquo(expr) 
  grpVar <- enquo(grpVar)
  colN <- quo_name(expr)
 df %>%
   group_by(!!grpVar) %>%
   summarise(qsec = mean(qsec),
         !!colN := mean(!!expr))


 }

summarise.dfN2(mtcars, mpg, cyl)
# A tibble: 3 Γ— 3
#    cyl     qsec      mpg
#  <dbl>    <dbl>    <dbl>
#1     4 19.13727 26.66364
#2     6 17.97714 19.74286
#3     8 16.77214 15.10000
+1
source

All Articles