Is there a drawback to using get () in dplyr instead of SE?

I read about SE and NSE in dplyr, and run into a problem when I really need SE. I have the following function, which should find strings where some elements match, but the target variable doesn't matter:

find_dataset_inconsistencies <- function(df, target_column, cols_to_use) {
  inconsists <- df %>% 
    group_by_at(cols_to_use) %>% 
    summarise(uTargets = length(unique(get(target_column)))) %>% 
    filter(uTargets > 1)
}

This seems to work in my case. However, get (target_column) is a workaround because I need SE of my variable and cannot hard-code the column name. At first I tried to do this with the SE ( summarise_(.dots = ...)) version , but could not find the correct syntax for evaluating target_column.

My question is this: is there a drawback to easy use get()? Are there any cases where this will not work? Any risks / slowdown? Just use is getdefinitely more readable than the "correct" SE syntax.

+6
source share
2 answers

This can be done with the help of the NSE, rlang.

Assuming your use case:

find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))
# # A tibble: 8 x 6
# # Groups:   cyl, vs, am, gear [5]
#     cyl    vs    am  gear  carb uTargets
#   <dbl> <dbl> <dbl> <dbl> <dbl>    <int>
# 1  4.00  1.00  0     4.00  2.00        2
# 2  4.00  1.00  1.00  4.00  1.00        4
# 3  4.00  1.00  1.00  4.00  2.00        2
# 4  6.00  1.00  0     3.00  1.00        2
# 5  6.00  1.00  0     4.00  4.00        2
# 6  8.00  0     0     3.00  2.00        4
# 7  8.00  0     0     3.00  3.00        3
# 8  8.00  0     0     3.00  4.00        4

You can:

library(dplyr)

f2 <- function(df, target_column, cols_to_use) {
  group_by_at(df, cols_to_use) %>% 
    summarise(uTargets = n_distinct(!! rlang::sym(target_column))) %>% 
    filter(uTargets > 1)
}

all.equal(
  find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb")),
  f2(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))
)
# [1] TRUE

Actual answer to your risk question:

Now imagine what you have foo <- 3in your global environment. For comparison:

find_dataset_inconsistencies(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))
# A tibble: 0 x 6
# Groups:   cyl, vs, am, gear [0]
# ... with 6 variables: cyl <dbl>, vs <dbl>, am <dbl>, gear <dbl>,
#   carb <dbl>, uTargets <int>

which will silently return an empty data frame and:

f2(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))
# Error in summarise_impl(.data, dots) : variable 'foo' not found

leading to an error that immediately indicates an error.


Edit

, , "tidyverse way", . , -, , , :

f3 <- function(df, target_column, ...) {
  target_column <- enquo(target_column)
  cols_to_use <- quos(...)
  group_by(df, !!! cols_to_use) %>% 
    summarise(uTargets = n_distinct(!! target_column)) %>% 
    filter(uTargets > 1)
}
all.equal(
  find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb")),
  f3(mtcars, target_column = mpg, cyl, vs, am, gear, carb)
)
# [1] TRUE

f3() , tidyverse- .

+5

@Aurele , , rlang, , , get. , get , , , . , , , .

1. /

summary do, get(..., .) , . , , get group by. , do , do , .

find_dataset_inconsistencies <- function(df, target_column, cols_to_use) {
  df %>% 
    group_by_at(cols_to_use) %>% 
    do(summarise(., uTargets = length(unique(get(target_column, .))))) %>% 
    filter(uTargets > 1)
}

# gives desired result
find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))
# ... snip correct output ...

# correctly gives an error indicating it can't find `foo`
foo <- 3
find_dataset_inconsistencies(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))

2. = FALSE

find_dataset_inconsistencies <- function(df, target_column, cols_to_use) {
  df %>% 
    group_by_at(cols_to_use) %>% 
    summarise(uTargets = length(unique(get(target_column,
       parent.env(parent.env(environment())), inherits = FALSE)))) %>% 
    filter(uTargets > 1)
}

# gives desired result
find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))
# ... snip correct output ...

# correctly gives an error indicating it can't find `foo`
foo <- 3
find_dataset_inconsistencies(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))
## Error in summarise_impl(.data, dots) : 
##   Evaluation error: object 'foo' not found.

, get :

GET <- function(x) {
  p <- parent.frame()
  p3 <- parent.env(parent.env(p))
  get(x, p3, inherits = FALSE)
}

find_dataset_inconsistencies <- function(df, target_column, cols_to_use) {
  df %>% 
    group_by_at(cols_to_use) %>% 
    summarise(uTargets = length(unique(GET(target_column)))) %>% 
    filter(uTargets > 1)
}

# gives expected answer    
find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))

# gives expected error
foo <- 3
find_dataset_inconsistencies(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))

3.

- . mtcars , , :

library(tidyr)
find_dataset_inconsistencies <- function(df, target_column, cols_to_use) {
  df %>% 
    rownames_to_column %>%
    group_by_at(cols_to_use) %>% 
    summarise(uTargets = length(unique(
        get(target_column, .[.$rowname %in% rowname, ])))) %>% 
    filter(uTargets > 1)
}

# gives expected answer
find_dataset_inconsistencies(mtcars, "mpg", c("cyl", "vs", "am", "gear", "carb"))

# gives expected error
find_dataset_inconsistencies(mtcars, "foo", c("cyl", "vs", "am", "gear", "carb"))
+2

All Articles