Subtract two lines from each other

I have the following input

#mydata ID variable1 variable2 1 a,b,c,dc,a 2 g,f,hh 3 p,l,m,n,cc,l 

I want to subtract varible2 lines from variable1, and I would like to have the following output?

 #Output ID Output 1 b,d 2 g,f 3 p,m,n 

#dput

 structure(list(ID = 1:3, variable1 = structure(1:3, .Label = c("a,b,c,d", "g,f,h", "p,l,m,n,c"), class = "factor"), variable2 = structure(c(1L, 3L, 2L), .Label = c("c,a", "c,l", "h"), class = "factor")), .Names = c("ID", "variable1", "variable2"), class = "data.frame", row.names = c(NA, -3L)) 
+5
source share
4 answers

We can use Map after splitting each of the columns into , combine their setdiff , paste , set the names of the list output with the column "ID", stack it to 'data.frame' and set names 'ID' and 'Output' for the columns.

 setNames(stack(setNames(Map(function(x,y) toString(setdiff(x,y)), strsplit(as.character(df1$variable1), ","), strsplit(as.character(df1$variable2), ",")), df1$ID))[2:1], c("ID", "Output")) # ID Output #1 1 b, d #2 2 g, f #3 3 p, m, n 

Or a compact option would be

 library(splitstackshape) cSplit(df1, 2:3, ",", "long")[, .(Output = toString(setdiff(variable1, variable2))) , ID] # ID Output #1: 1 b, d #2: 2 g, f #3: 3 p, m, n 
+4
source

You can try,

 Map(setdiff, strsplit(as.character(df$variable1), ',')), strsplit(as.character(df$variable2), ',')) 
+5
source

Using grepl instead of setdiff

 library(stringr) a1 <- str_split(d$variable1, ",") a2 <- str_split(d$variable2, ",") do.call("rbind",Map(function(x,y) paste(x[!grepl(paste(y, collapse="|"), x)], collapse=","), a1, a2)) [,1] [1,] "b,d" [2,] "g,f" [3,] "p,m,n" 
+2
source

Using Dplyr

 mydata %>% rowwise() %>% mutate(output = paste0(setdiff(strsplit(as.character(variable1),split = ",")[[1]], strsplit(as.character(variable2),",")[[1]] ),collapse = ",")) %>% select(ID,output) 

output:

  ID output (int) (chr) 1 1 b,d 2 2 g,f 3 3 p,m,n 
+2
source

All Articles