data.table, dplyr, , , , .
function(x, y)
as.Date(x) > min(as.Date(y))
split() , Map(), , split<-(),
answer <- logical(nrow(dat))
split(answer, dat$id) <-
Map(fun, split(dat$date1, dat$id), split(dat$date2, dat$id))
, , , . , ; fun() .
@chinsoon12 ( ),
df <- as.data.frame(dat)
mtm1 <- function(df) {
answer <- logical(nrow(dat))
split(answer, df$id) <-
Map(fun, split(df$date1, df$id), split(df$date2, df$id))
answer
}
> identical(mtm1(df), frankMtd()$v)
[1] TRUE
> microbenchmark::microbenchmark(frankMtd(), mtm(df), times=5L)
Unit: milliseconds
expr min lq mean median uq max
frankMtd() 1917.95697 1927.2548 1928.65821 1928.45893 1933.34159 1936.27878
mtm1(df) 47.00293 47.0198 48.02849 47.10012 47.18432 51.83523
neval cld
5 b
5 a
1000 (id = sample(1000, N, replace = TRUE)),
Unit: milliseconds
expr min lq mean median uq max neval
frankMtd() 140.87859 140.88647 141.97093 141.86977 142.28619 143.9336 5
mtm1(df) 61.82032 64.55505 64.61313 65.53642 65.53768 65.6162 5
cld
b
a
Date
mtm2 <- function(df) {
answer <- logical(nrow(df))
split(answer, df$id) <- Map(
function(x, y) x > min(y),
split(as.numeric(df$date1), df$id),
split(as.numeric(df$date2), df$id)
)
answer
}
with 1e5 values in 1e4 groups, with a idfactor (), and compared with the fastest frank_*()results
> identical(frank_any()$v, mtm1(df))
[1] TRUE
> identical(frank_any()$v, mtm2(df))
[1] TRUE
and
Unit: milliseconds
expr min lq mean median uq max neval
frank_any() 79.90262 80.43112 81.79228 81.18565 83.18963 84.25236 5
mtm1(df) 237.00027 241.40299 244.83638 246.26495 249.47713 250.03658 5
mtm2(df) 44.11074 46.17133 51.26976 47.03285 52.77204 66.26184 5
cld
b
c
a