Quickly split a large vector into pieces in R

My question is very closely related to this:

Divide vector into pieces in R

I am trying to split a large vector into known block sizes and slowly. The solution for vectors with even residues is here:

A quick fix when there is a factor:

Split data into equal parts based on the length of the data block

I would like to handle the case of the absence of a (large) factor existing, as I would like rather large chunks.

My example is for a vector much smaller than the one used in my real application:

d <- 1:6510321
# Sloooow
chunks <- split(d, ceiling(seq_along(d)/2000))
+4
source share
2 answers

Using llplyfrom the package plyr, I was able to reduce the time.

chunks <- function(d, n){      
    chunks <- split(d, ceiling(seq_along(d)/n))
    names(chunks) <- NULL
    return(chunks)
 }

require(plyr)
plyrChunks <- function(d, n){
     is <- seq(from = 1, to = length(d), by = ceiling(n))
     if(tail(is, 1) != length(d)) {
          is <- c(is, length(d)) 
     } 
     chunks <- llply(head(seq_along(is), -1), 
                     function(i){
                         start <-  is[i];
                         end <- is[i+1]-1;
                         d[start:end]})
    lc <- length(chunks)
    td <- tail(d, 1)
    chunks[[lc]] <- c(chunks[[lc]], td)
    return(chunks)
 }

 # testing
 d <- 1:6510321
 n <- 2000

 system.time(chks <- chunks(d,n))
 #    user  system elapsed 
 #   5.472   0.000   5.472 

 system.time(plyrChks <- plyrChunks(d, n))
 #    user  system elapsed 
 #   0.068   0.000   0.065 

 identical(chks, plyrChks)
 # TRUE

, .parallel llpyr. .progress.

+2

:

chunks <- parallel::splitIndices(6510321, ncl = ceiling(6510321/2000))
+2

All Articles