This is my first shot in an attempt to make a reproducible question. Trying to become a better member of SO.
here is the data set
reach.dat <- structure(list(stream = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Brooks Creek", "Buncombe Hollow Creek",
"Bypass Channel", "Cape Horn Creek", "Cougar Creek", "Dog Creek",
"Indian George Creek", "Jim Creek", "Lower Speelyai", "North Siouxon Creek",
"Ole Creek", "Panamaker Creek", "Siouxon Creek", "Speelyai Creek",
"West Fork Speelyai Creek", "West Tributary Speelyai Creek"), class = "factor"),
reach = structure(c(21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 46L, 47L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L), .Label = c("BNH_0001", "BNH_0002", "BNH_0003",
"BNH_0004", "BNH_0005", "BNH_0006", "BNH_0007", "BNH_0008",
"BNH_0009", "BPC_0001", "BPC_0002", "BPC_0003", "BPC_0004",
"BPC_0005", "BPC_0006", "BPC_0007", "BPC_0008", "BPC_0009",
"BPC_0010", "BPC_0011", "BRK_001", "BRK_002", "BRK_003",
"BRK_004", "BRK_005", "BRK_006", "BRK_007", "BRK_008", "BRK_009",
"BRK_010", "BRK_011", "BRK_012", "BRK_013", "BRK_014", "BRK_015",
"BRK_016", "BRK_017", "CGR_0001", "CGR_0002", "CGR_0003",
"CGR_0004", "CGR_0005", "CGR_0006", "CGR_0007", "CGR_0008",
"CHC_0001", "CHC_0002", "DOG_0001", "ING_0001", "ING_0002",
"ING_0003", "ING_0004", "ING_0005", "ING_0006", "ING_0007",
"JMC_0001", "JMC_0002", "LSPL_0001", "NSX_0001", "NSX_0002",
"OLE_0001", "OLE_0002", "OLE_0003", "OLE_0004", "OLE_0005",
"OLE_0006", "PMR_0001", "PMR_0002", "SPL_0001", "SPL_0002",
"SPL_0003", "SPL_0004", "SPL_0005", "SPL_0006", "SPL_0007",
"SPL_0008", "SPL_0009", "SPL_0010", "SPL_0011", "SPL_0012",
"SPL_0013", "SPL_0014", "SPL_0015", "SPL_0016", "SPL_0017",
"SPL_0018", "SPL_0019", "SPL_0020", "SPL_0021", "SPL_0022",
"SPL_0023", "SXN_0001", "SXN_0002", "SXN_0003", "SXN_0004",
"SXN_0005", "SXN_0006", "SXN_0007", "SXN_0008", "WFSPL_0001",
"WFSPL_0002", "WFSPL_0003", "WFSPL_0004", "WFSPL_0005", "WTSPL_0001",
"WTSPL_0002", "WTSPL_0003", "WTSPL_0004", "WTSPL_0005"), class = "factor"),
length = c(178.9, 170.1, 185.8, 199.7, 190.3, 190, 172.3,
196.4, 179, 183.4, 182.4, 196.7, 188.4, 181.5, 178.9, 196.8,
181.2, 116.2, 121.4, 124.2, 180, 111.7, 130.3, 127.5, 143.4,
75.7, 591.1, 640.9, 582.2, 659, 534.9, 621.9, 636, 564.2,
547.1, 537.2, 589.6, 329.5, 192.7, 454.4, 464.5, 477.6, 544.8,
500.1, 473.1, 481.7, 506.4), SUM_LWD = c(0.288093907, 1.324221046,
1.763186222, 0.774161242, 0.391907514, 0.889842105, 0, 1.5200611,
1.097765363, 0.573173391, 0, 0.622267412, 0.427070064, 1.43338843,
1.151928452, 1.935365854, 1.856622517, 0.326333907, 0.07199341,
0.037520129, 0, 0, 0, 0.169647059, 0.138493724, 0.098678996,
0, 0, 0.217279285, 0.179044006, 0.097868761, 0.210644798,
0, 0.708259482, 0.145567538, 0.03877513, 0.026611262, 2.302579666,
2.125583809, 0.092033451, 0.176533907, 0.955904523, 0.175624082,
1.434553089, 1.038575354, 0.198048578, 1.042061611), Avg_RPD = c(0.352,
0.343333333, 0.325, 0.34, 0.225, 0.2475, 0.254, 0.2125, 0.276666667,
0.22, 0.32875, 0.23125, 0.215, 0.268, 0.305, 0.243636, 0.335714286,
0.2, 0.216666667, 0.24, 0.243333333, 0.56, 0.2575, 0.208,
0.335833333, 0.376666667, 0.175, 0.695, 0.75, 0.546666667,
1.188333333, 1.58, 0.885, 0.448571429, NA, NA, NA, 0.611818182,
0.50875, NA, 0.77, 0.49875, NA, 0.544, 1.017777778, 0.9175,
0.623333333), Avg_Substrate_Pools = c(86.10955531, 104.0366873,
83.94224019, 88.24737809, 88.93432719, 82.59257502, 84.02947757,
81.71253918, 82.76023619, 82.88298227, 84.91750332, 81.21887219,
85.05625823, 82.04273063, 84.01489539, 92.41003006, 117.3416424,
88.61396078, 88.86511047, 83.38603629, 71.73828707, 76.57563745,
86.2069123, 83.62789949, 81.19546417, 80.89002676, 182.5586,
160.63761, 134.82532, 123.1769494, 112.0805653, 93.59420959,
180.5731068, 82.91509529, NA, 61.9283, NA, 111.7153167, 83.79134743,
61.9283, 89.51477005, NA, NA, 86.08708892, 85.3472397, 110.8504333,
91.00371559)), .Names = c("stream", "reach", "length", "SUM_LWD",
"Avg_RPD", "Avg_Substrate_Pools"), row.names = c(NA, 47L), class = "data.frame")
This is a small part of the data frame, and I deleted several levels of one of the factor variables (stream in this case). So, you will see at the beginning of the script, I will add a few more columns, delete the rows with NA (this is the source of my problem, in my opinion, since this code is great for variables that do not have NA cells), then I omit the levels, since there were much more threads in the original dataset, as I just mentioned. The error is executed at the end of the script when ht.means.xx is launched. I get an errorError: result size (5), expected 4 or 1. , 5 - , 5 . , NA. , 1 . , , () NA, - () , NA.
script. , . , ( ) data.frame, .
require(dplyr)
require(sampling)
require(xtable)
require(car)
require(lattice)
reach.dat$streamID <- as.numeric(reach.dat$stream)
reach.dat$reachID <- 1:47
length.totals <- reach.dat %>%
group_by(stream) %>%
summarise(totals = sum(length))
reach.dat <- merge(reach.dat, length.totals, by = "stream")
reach.dat$length.probs <- with(reach.dat, length/totals)
reach.dat <- reach.dat[, c("stream","streamID","reach","reachID", "length",
"length.probs", "totals", "Avg_RPD", "Avg_Substrate_Pools", "SUM_LWD")]
reach.dat <- reach.dat[,1:8]
completeFun <- function(reach.dat, desiredCols) {
completeVec <- complete.cases(reach.dat[, desiredCols])
return(reach.dat[completeVec, ])
}
reach.dat <- completeFun(reach.dat, "Avg_RPD")
droplevels(reach.dat)
nsim <- 100
sample.fun.pi <- function(stream.no, n.vec){
sample <-
ifelse(length(reach.dat[reach.dat$streamID==stream.no, "reachID"]) == 1,
reach.dat[which(reach.dat$streamID==stream.no), "reachID"],
list(sample(subset(reach.dat, streamID == stream.no)$reachID,
size = n.vec[stream.no],
prob = reach.dat[reach.dat$streamID == stream.no, "length.probs"],
replace = FALSE)))
return(unlist(sample))
}
strata.uneq.pi <- function(n.vec, nsim){
sample.all <- matrix(NA, nrow = sum(n.vec), ncol = nsim)
for(i in 1:nsim){
sample.all[, i] <- unlist(apply(cbind(1:length(unique(reach.dat$streamID))), 1,
sample.fun.pi, n.vec))
}
return(sample.all)
}
n1.6<-c(1,1,1,1,1)
n7.8<-c(1,1,1,1,1)
n9.10<-c(2,1,1,1,1)
set.seed(303)
sample.1.6 <- strata.uneq.pi(n1.6, nsim)
sample.7.8 <- strata.uneq.pi(n7.8, nsim)
sample.9.10 <- strata.uneq.pi(n9.10, nsim)
pi.1.6 <- numeric(length(reach.dat$reach))
for(i in 1:length(reach.dat$reach)) {
pi.1.6[i] <- sum(sample.1.6==i)/nsim
}
pi.7.8 <- numeric(length(reach.dat$reach))
for(i in 1:length(reach.dat$reach)) {
pi.7.8[i] <- sum(sample.7.8==i)/nsim
}
pi.9.10 <- numeric(length(reach.dat$reach))
for(i in 1:length(reach.dat$reach)) {
pi.9.10[i] <- sum(sample.9.10==i)/nsim
}
reach.dat$y<-(reach.dat$Avg_RPD)
reach.dat$stream <- as.factor(as.character(reach.dat$stream))
num.reaches <- unlist(with(reach.dat, tapply(reach, stream, length)))
true_y <- reach.dat %>%
group_by(stream) %>%
summarise(true_y=mean(y)) %>%
ungroup()
ht.fun <- function(simnum, sample.n, pi.n){
reach <- sample.n[, simnum]
pi <- pi.n[sample.n[, simnum]]
y <- reach.dat[sample.n[, simnum], "y"]
stream <- reach.dat[sample.n[, simnum], "stream"]
data <- cbind.data.frame(reach, pi, y, stream)
ht.strata <- data %>%
tbl_df %>%
group_by(stream) %>%
summarise(total = sum(y/pi)) %>%
mutate(size = num.reaches) %>%
mutate(mean = total/size)
return(ht.strata[, "mean"])
}
ht.means.1.6 <- data.frame(apply(cbind(1:nsim), 1, ht.fun, sample.n = sample.1.6,
pi.n = pi.1.6))
ht.means.7.8 <- data.frame(apply(cbind(1:nsim), 1, ht.fun, sample.n = sample.7.8,
pi.n = pi.7.8))
ht.means.9.10 <- data.frame(apply(cbind(1:nsim), 1, ht.fun, sample.n = sample.9.10,
pi.n = pi.9.10))