Another variant:
(1:nrow(gsu) %in% which(gsu=='Yes', arr.ind=TRUE)[,1])+0L
or
apply(gsu=='Yes' & !is.na(gsu), 1, any) + 0L
or
Reduce(`|`,as.data.frame(gsu=='Yes' & !is.na(gsu))) + 0L
or
do.call(`pmax`, c(lapply(gsu,`==`, 'Yes'), na.rm=TRUE))
Benchmarks
set.seed(24)
gsu1 <- as.data.frame(matrix(sample(c(NA, 'Yes', 'No', LETTERS),
4000*4000, replace=TRUE), ncol=4000), stringsAsFactors=FALSE)
akrun1 <- function() (1:nrow(gsu1) %in% which(gsu1=='Yes',
arr.ind=TRUE)[,1]) +0L
akrun2 <- function() do.call(`pmax`, c(lapply(gsu1, `==`, 'Yes'),
na.rm=TRUE))
ExperimenteR <- function() rowSums(gsu1=="Yes", na.rm=TRUE)>=1
library(microbenchmark)
microbenchmark(akrun1(), akrun2(), ExperimenteR(), unit='relative', times=20L)
#Unit: relative
# expr min lq mean median uq max neval cld
# akrun1() 1.244682 1.293628 1.293696 1.294336 1.319209 1.277138 20 b
# akrun2() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 20 a
# ExperimenteR() 1.213802 1.296464 1.276666 1.295421 1.280282 1.209436 20 b
source
share