Separate data rows based on repeat sequence

Question

Separate data rows based on repeat sequence

I have an example data frame below. I try to take each sequence of three lines and divide the first by 3 (or, in other words, class "a" by class "c", for each id). What is the easiest way to do this? Thanks in advance.

 id class value
 0  a   241
 0  b   109
 0  c   350
40  a   239
40  b   160
40  c   399
53  a   158
53  b   97
53  c   255
54  a   239
54  b   78
54  c   317

+4

r dataframe

Silvand May 27 '15 at 4:02

source share

2 answers

split/unsplit base R. split "id" , "list", lapply, transform data.frame, ('value1') "", "" ( "a" "c" ), unsplit "id" .

 unsplit(lapply(split(df1, df1$id), function(x) 
      transform(x, value1= value[class=='a']/value[class=='c'])), df1$id)
 #   id class value    value1
 #1   0     a   241 0.6885714
 #2   0     b   109 0.6885714
 #3   0     c   350 0.6885714
 #4  40     a   239 0.5989975
 #5  40     b   160 0.5989975
 #6  40     c   399 0.5989975
 #7  53     a   158 0.6196078
 #8  53     b    97 0.6196078
 #9  53     c   255 0.6196078
 #10 54     a   239 0.7539432
 #11 54     b    78 0.7539432
 #12 54     c   317 0.7539432

mutate dplyr, "id"

library(dplyr)
df1 %>% 
     group_by(id) %>% 
      mutate(value1= value[class=='a']/value[class=='c'])
#    id class value    value1
#1   0     a   241 0.6885714
#2   0     b   109 0.6885714
#3   0     c   350 0.6885714
#4  40     a   239 0.5989975
#5  40     b   160 0.5989975
#6  40     c   399 0.5989975
#7  53     a   158 0.6196078
#8  53     b    97 0.6196078
#9  53     c   255 0.6196078
#10 54     a   239 0.7539432
#11 54     b    78 0.7539432
#12 54     c   317 0.7539432

, , ,

      mutate(value1=first(value)/last(value))

data.table. := "value1". 'data.frame' 'data.table' (setDT(df1)), 'value1', 'id'. 1- (1L) (.N) , , 'a' 'c' . , value[class=='a']/value[class=='c'], .

library(data.table)
setDT(df1)[, value1:=value[1L]/value[.N] , id]
#   id class value    value1
#1:  0     a   241 0.6885714
#2:  0     b   109 0.6885714
#3:  0     c   350 0.6885714
#4: 40     a   239 0.5989975
#5: 40     b   160 0.5989975
#6: 40     c   399 0.5989975
#7: 53     a   158 0.6196078
#8: 53     b    97 0.6196078
#9: 53     c   255 0.6196078
#10:54     a   239 0.7539432
#11:54     b    78 0.7539432
#12:54     c   317 0.7539432

base R. , .

df1$value1 <- (df1$value[df1$class=='a']/
        df1$value[df1$class=='c'])[as.numeric(factor(df1$id))]

Update

, mutate summarise dplyr

 df1 %>%
    group_by(id) %>% 
    summarise(value1= value[class=='a']/value[class=='c'])
 #   id    value1
 #1  0 0.6885714
 #2 40 0.5989975
 #3 53 0.6196078
 #4 54 0.7539432

(:=) data.table

setDT(df1)[, list(value1=value[class=='a']/value[class=='c']), id]
 #    id    value1
 #1:  0 0.6885714
 #2: 40 0.5989975
 #3: 53 0.6196078
 #4: 54 0.7539432

df1 <- structure(list(id = c(0L, 0L, 0L, 40L, 40L, 40L, 53L, 53L, 53L, 
54L, 54L, 54L), class = c("a", "b", "c", "a", "b", "c", "a", 
"b", "c", "a", "b", "c"), value = c(241L, 109L, 350L, 239L, 160L, 
399L, 158L, 97L, 255L, 239L, 78L, 317L)), .Names = c("id", "class", 
"value"), class = "data.frame", row.names = c(NA, -12L))

+1

akrun 27 '15 4:06

share

B.Shankar · Accepted Answer · 2015-05-27T04:18:45+0000

You can use byto group by id , then a subset using the value and class :

by(data = data, INDICES = data$id, 
   FUN = function(ds) ds[ds$class == "a", "value"] / 
                      ds[ds$class == "c", "value"])

This will result in a single value for each id . Output:

data$id: 0
[1] 0.6885714
----------------------------------------------------------------------------------------- 
data$id: 40
[1] 0.5989975
------------------------------------------------------------------------------------------ 
data$id: 53
[1] 0.6196078
------------------------------------------------------------------------------------------ 
data$id: 54
[1] 0.7539432

Separate data rows based on repeat sequence

Update

More articles: