Here's an approach that uses split from R base after using melt from "reshape2":
library(reshape2) x <- melt(lst) split(x$L1, x$value) # $`1` # [1] "c" # # $`2` # [1] "a" "c" # # $`3` # [1] "a" "b" "c" "d" # # $`4` # [1] "b" "b" "c" # # $`6` # [1] "a" "b" "b" "d" # # $`7` # [1] "b" # # $`9` # [1] "b" "c" # # $`10` # [1] "a" "b" # # $`15` # [1] "a" # # $`17` # [1] "a" "d"
Similarly, in the R database with stack :
x <- stack(lapply(lst, c)) split(as.character(x$ind), x$values)
Or even more directly, if you are working with "lst" rather than "lst":
x <- stack(lst) split(as.character(x$ind), x$values)
To clarify my comment, the more efficient way that I described was as follows:
split(rep(names(lst), lapply(lst, nrow)), unlist(lst, use.names = FALSE))
For a much larger lst we get the following:
fun1 <- function() split(rep(names(lst), lapply(lst, nrow)), unlist(lst, use.names = FALSE)) fun2 <- function() { x <- stack(lapply(lst, c)) ; split(as.character(x$ind), x$values) } fun3 <- function() { x <- melt(lst) ; split(x$L1, x$value) } fun4 <- function() unstack(stack(lapply(lst, as.vector)), ind ~ values) ## Make lst much bigger lst <- unlist(replicate(10000, lst, simplify = FALSE), recursive=FALSE) names(lst) <- make.unique(names(lst)) library(microbenchmark) system.time(fun3()) # user system elapsed # 48.338 0.000 47.643 microbenchmark(fun1(), fun2(), fun4(), times = 5) # Unit: milliseconds # expr min lq median uq max neval # fun1() 454.5913 456.6793 473.901 555.8954 574.4394 5 # fun2() 922.1282 1028.4972 1034.872 1068.4761 1150.8072 5 # fun4() 1222.5296 1300.0643 1323.253 1339.2037 1421.1546 5