df = dget("http://dpaste.com/2SJ6DPX.txt")
""
v = strsplit(as.character(df$variable), ",", fixed=TRUE)
len = lengths(v) ## sapply(v, length) in R-3.1.3
v , v ,
uv = unlist(v)
idx = rep(seq_along(v), len)
, , uv FO.variable
test = (uv == as.character(df$FO.variable)[idx])
df$Keep = FALSE
df$Keep[ idx[test] ] = TRUE
( , data.frame, dd$Keep = f0(dd))
f0 = function(dd) {
v = strsplit(as.character(dd$variable), ",", fixed=TRUE)
len = lengths(v)
uv = unlist(v)
idx = rep(seq_along(v), len)
keep = logical(nrow(dd))
keep[ idx[uv == as.character(dd$FO.variable)[idx]] ] = TRUE
keep
}
( , , , , , ?) ( , )
f1 = function(dd)
mapply(grepl, dd$FO.variable, dd$variable, fixed=TRUE)
f1a = function(dd)
mapply(grepl, as.character(dd$FO.variable),
as.character(dd$variable), fixed=TRUE)
f2 = function(dd)
apply(dd, 1, function(x) grepl(x[4], x[2], fixed=TRUE))
> library(microbenchmark)
> identical(f0(df), f1(df))
[1] TRUE
> identical(f0(df), unname(f2(df)))
[1] TRUE
> microbenchmark(f0(df), f1(df), f1a(df), f2(df))
Unit: microseconds
expr min lq mean median uq max neval
f0(df) 57.559 64.6940 70.26804 69.4455 74.1035 98.322 100
f1(df) 573.302 603.4635 625.32744 624.8670 637.1810 766.183 100
f1a(df) 138.527 148.5280 156.47055 153.7455 160.3925 246.115 100
f2(df) 494.447 518.7110 543.41201 539.1655 561.4490 677.704 100
, fixed = TRUE .