It is not easy to figure out an effective solution to the following problem. The question is very verbose, because I'm not sure that I am making this problem harder than it can be.
For the named vector
t <- c(2, 0, 0, 30, 0, 0, 10, 2000, 0, 20, 0, 40, 60, 10) names(t) <- c(1, 0, 0, 2, 0, 0, 3, 4, 0, 5, 0, 6, 7, 8)
I want to split t into a list of 4 elements that are balanced based on the sum of the resulting elements of the list, preserving the order of the elements, and only dividing into non-zero elements. Expected Result
L[1] <- c(2, 0, 0, 30, 0, 0, 10)
The error function that I use minimizes sd(rowSums(L)) or sd(sapply(L, sum))
Trying to split a vector using something like the following does not quite work
split(t, cut(cumsum(t), 4)) # $`(-0.17,544]` # 1 0 0 2 0 0 3 # 2 0 0 30 0 0 10 # $`(544,1.09e+03]` # named numeric(0) # $`(1.09e+03,1.63e+03]` # named numeric(0) # $`(1.63e+03,2.17e+03]` # 4 0 5 0 6 7 8 # 2000 0 20 0 40 60 10
I wrote a function to break the list the way I wanted (see error function above)
break_at <- function(val, nchunks) { nchunks <- nchunks - 1 nonzero <- val[val != 0] all_groupings <- as.matrix(gtools::permutations(n = 2, r = length(nonzero), v = c(1, 0), repeats.allowed = TRUE)) all_groupings <- all_groupings[rowSums(all_groupings) == nchunks, ] which_grouping <- which.min( sapply( 1:nrow(all_groupings), function(i) { sd( sapply( split( nonzero, cumsum(all_groupings[i,]) ), sum ) ) } ) ) mark_breaks <- rep(0, length(val)) mark_breaks[names(val) %in% which(all_groupings[which_grouping,]==1)] <- 1 return(mark_breaks) }
You can see that the result is much better.
break_at(t, 4) # 0 0 0 0 0 0 0 1 0 1 0 0 1 0 split(t, cumsum(break_at(t, 4))) # $`0` # 1 0 0 2 0 0 3 # 2 0 0 30 0 0 10 # $`1` # 4 0 # 2000 0 # $`2` # 5 0 6 # 20 0 40 # $`3` # 7 8 # 60 10
It works using gtools::permutations(n = 2, r = length(nonzero), v = c(1, 0), repeats.allowed = TRUE) to look at all potential splits. See how it works for r = 3
# [,1] [,2] [,3] # [1,] 0 0 0 # [2,] 0 0 1 # [3,] 0 1 0 # [4,] 0 1 1 # [5,] 1 0 0 # [6,] 1 0 1 # [7,] 1 1 0 # [8,] 1 1 1
which then I filter, all_groupings[rowSums(all_groupings) == nchunks, ] . It only considers the potential cleavages that nchunks .
My problem is that this works terribly with my real data due to the number of permutations.
hard <- structure(c(2, 0, 1, 2, 0, 1, 1, 1, 5, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 0, 1, 4, 0, 0, 0, 1, 3, 0, 0, 4, 0, 0, 0, 2, 0, 1, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1, 6, 0, 0, 0, 0, 0, 1, 1, 1, 3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 0, 1, 2, 0, 1, 1, 4, 0, 0, 0, 1, 1, 3, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 0, 3, 0, 0, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 2, 0, 2, 0, 1, 3, 0, 0, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 3, 0, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 5, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 2, 0, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 0, 1, 2, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 1, 1, 1, 1, 6, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 1, 1, 3, 0, 0, 7, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1), .Names = c("1", "0", "2", "3", "0", "4", "5", "6", "7", "0", "0", "0", "0", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "0", "0", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "0", "40", "41", "42", "43", "0", "44", "45", "46", "47", "48", "49", "50", "51", "52", "0", "53", "0", "54", "55", "0", "0", "0", "56", "57", "0", "0", "58", "0", "0", "0", "59", "0", "60", "61", "62", "63", "0", "0", "64", "65", "66", "67", "68", "0", "69", "70", "0", "71", "72", "73", "0", "74", "75", "0", "0", "0", "0", "0", "76", "77", "78", "79", "0", "0", "80", "81", "82", "83", "84", "85", "86", "87", "88", "0", "89", "90", "91", "0", "92", "93", "0", "94", "95", "96", "0", "0", "0", "97", "98", "99", "0", "0", "100", "101", "0", "102", "103", "104", "0", "105", "106", "0", "0", "107", "108", "0", "0", "109", "110", "111", "112", "0", "113", "114", "115", "116", "117", "118", "119", "120", "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131", "0", "132", "133", "134", "0", "135", "0", "0", "136", "137", "138", "0", "139", "140", "0", "141", "142", "143", "144", "0", "145", "0", "146", "147", "0", "0", "148", "149", "150", "151", "152", "153", "0", "154", "155", "156", "157", "0", "158", "159", "0", "160", "161", "162", "163", "164", "165", "166", "0", "167", "168", "169", "170", "171", "172", "173", "174", "175", "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186", "0", "187", "188", "189", "190", "191", "192", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "193", "194", "195", "196", "197", "0", "198", "199", "200", "201", "0", "202", "203", "204", "205", "0", "206", "0", "0", "0", "0", "0", "0", "0", "207", "208", "0", "209", "210", "211", "212", "213", "214", "215", "0", "216", "217", "218", "219", "220", "221", "0", "222", "223", "224", "225", "0", "0", "226", "227", "228", "229", "230", "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "0", "241", "242", "243", "244", "245", "246", "247", "248", "0", "249", "250", "251", "252", "253", "254", "0", "255", "256", "257", "258", "259", "260", "0", "0", "261", "262", "263", "264", "0", "265", "266", "267", "268", "269", "270", "271", "272", "273", "274", "0", "275", "276", "277", "278", "279", "280", "281", "282", "0", "283", "284", "285", "286", "287", "0", "0", "0", "0", "288", "0", "0", "0", "0", "0", "289", "290", "291", "292", "293", "294", "295", "296", "297", "298", "299", "300", "301", "302", "303", "304", "305", "306", "307", "308", "309", "310", "311", "312", "313", "314", "315", "316", "317", "318", "319", "320", "321", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "322", "323", "324", "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338", "339", "340", "341", "0", "342", "343", "344", "345", "346", "0", "347", "0", "348", "349", "350", "351", "352", "353", "354", "355", "356", "357", "358", "359", "360", "0", "361", "362", "363", "0", "364", "365", "0", "366", "367", "0", "0", "0", "0", "0", "0", "0", "368", "0", "369", "370", "0", "0", "0", "0", "0", "0", "0", "0", "371", "0", "0", "372", "0", "0", "0", "373", "374", "375", "376", "377", "0", "0", "0", "0", "0", "378", "0", "0", "0", "0", "0", "379", "380", "0", "0", "381", "382", "383", "384", "0", "0", "385", "0", "0", "0", "0", "0", "0", "386", "387", "388", "0", "389", "390", "391", "392", "393", "394", "395", "396", "397", "398", "399", "400", "401", "402", "0", "403", "404", "405", "406", "407", "408", "409"))