Here will be the base equivalent
dat <- structure(list(Group = c("A", "A", "A", "B", "B", "C", "C", "C", "A", "A", "B"), Var = c(1.3, 1.2, 0.4, 0.3, 1.3, 1.5, 1.7, 1.9, 2.1, 2.4, 6.7)), .Names = c("Group", "Var"), class = "data.frame", row.names = c(NA, -11L)) with(dat, cumsum(c(1L, Group[-length(Group)] != Group[-1]))) # [1] 1 1 1 2 2 3 3 3 4 4 5
As a function
rleid <- function(x) cumsum(c(1L, x[-length(x)] != x[-1])) (dat <- within(dat, id <- rleid(Group))) # Group Var id # 1 A 1.3 1 # 2 A 1.2 1 # 3 A 0.4 1 # 4 B 0.3 2 # 5 B 1.3 2 # 6 C 1.5 3 # 7 C 1.7 3 # 8 C 1.9 3 # 9 A 2.1 4 # 10 A 2.4 4 # 11 B 6.7 5
aggregate based on new variable
aggregate(Var ~ ., dat, sum) # Group id Var # 1 A 1 2.9 # 2 B 2 1.6 # 3 C 3 5.1 # 4 A 4 4.5 # 5 B 5 6.7
Alternatively, you can actually use rle , but this requires an atomic vector, so if you use a coefficient, you need an extra step (i.e. as.vector )
rleid2 <- function(x) { x <- as.vector(x) rep(seq_along(rle(x)$values), rle(x)$lengths) } rleid2(dat$Group)
Some guidelines:
set.seed(1) dat2 <- dat[sample(1:nrow(dat), 1e6, TRUE), ] identical(data.table::rleid(dat2$Group), rleid(dat2$Group))