A subset and join of a data frame by matching on a nested list in R

I am trying to combine two data frames, df and myData, depending on the elements in the column from each. The df column purposefully contains nested lists, and I would like to join if the element in the nested list matches the myData element. I would like to keep unsurpassed strings in df (left join).

Here is an example, first without nested lists in df.

df = data.frame(a=1:5)
df$x1= c("a", "b", "g", "a", "a")
str(df)

'data.frame':   5 obs. of  2 variables:
$ a : int  1 2 3 4 5
$ x1: chr  "a" "b" "g" "a" ...

myData <- data.frame(x1=c("a", "g", "q"), x2= c("za", "zg", "zq"), stringsAsFactors = FALSE)

Now we can join the column x1:

#using a for loop
df$x2 <- NA
for(id in 1:nrow(myData)){
  df$x2[df$x1 %in% myData$x1[id]] <- myData$x2[id]
}

Or using dplyr:

library(dplyr)
df = data.frame(a=1:5)
df$x1= c("a", "b", "g", "a", "a")
df %>%
  left_join(myData)

Now consider df with nested lists.

l1 = list(letters[1:5])
l2 = list(letters[6:10])
df = data.frame(a=1:5)
df$x1= c("a", "b", "g", l1, l2)

Using a for loop does not match the elements of a nested list, as we expect:

df$x2 <- NA
for(id in 1:nrow(myData)){
  df$x2[df$x1 %in% myData$x1[id]] <- myData$x2[id]
}

output:

df
  a            x1   x2
1 1             a   za
2 2             b <NA>
3 3             g   zg
4 4 a, b, c, d, e <NA>
5 5 f, g, h, i, j <NA>

Using dplyr:

df %>%
  left_join(myData)

causes an error:

Joining by: c("x1", "x2")
Error: cannot join on column 'x1'

, , , .

data.table. data.table, . , data.table , , .

100 000 , R ( data.table?)

Fwiw, ( ) - , Python, , R.

?

+4
3

, . , .

getMatch <- function(x, y) {
      z <- y[[2]][sort(match(x, y[[1]]))]
      z[!length(z)] <- NA
      z
}
> rapply(unname(df[-1]), getMatch, y = myData)
# [1] "za" NA   "zg" "za" "zg"

, within

> within(df, { x2 <- sapply(df$x1, getMatch, y = myData) })
#  a            x1   x2
#1 1             a   za
#2 2             b <NA>
#3 3             g   zg
#4 4 a, b, c, d, e   za
#5 5 f, g, h, i, j   zg
+2

:

df$x2 <- NA
for(id in 1:nrow(df)) 
  {
  df$x2[id] <- ifelse(
    length(ff <- myData$x2[which(myData$x1 == intersect(df$x1[[id]], myData$x1))])==0, 
    NA, 
    ff)
  }

df
#  a            x1   x2
#1 1             a   za
#2 2             b <NA>
#3 3             g   zg
#4 4 a, b, c, d, e   za
#5 5 f, g, h, i, j   zg

. , l1 (, "a" "g" ):

l1 = list(letters[1:7])
df$x1= c("a", "b", "g", l1, l2)

, :

df$x2 <- NA
    for(id in 1:nrow(df)) 
      {
      df$x2[id] <- ifelse(
        length(ff <- myData$x2[which(myData$x1 == intersect(df$x1[[id]], myData$x1))])==0, 
        NA, 
        ff)
      }
Warning message:
In myData$x1 == intersect(df$x1[[id]], myData$x1) :
  longer object length is not a multiple of shorter object length

, , . : paste, list , .

df$x2 <- NA
    for(id in 1:nrow(df)) 
      {
      df$x2[id] <- 
        paste(if (length(ff <- myData$x2[which(myData$x1 %in% intersect(df$x1[[id]], myData$x1))])==0)
        NA else
        ff, collapse=", ")
      }


df$x2 <- NA
    for(id in 1:nrow(df)) 
      {
      df$x2[id] <- 
        list(if (length(ff <- myData$x2[which(myData$x1 %in% intersect(df$x1[[id]], myData$x1))])==0)
        NA else
        ff)
      }

, :

  a                  x1     x2
1 1                   a     za
2 2                   b     NA
3 3                   g     zg
4 4 a, b, c, d, e, f, g za, zg
5 5       f, g, h, i, j     zg
+3

data.table:

library(data.table)

# convert to data.table in place
setDT(myData)

# using Frank extended example
l1 = list(letters[1:7])
l2 = list(letters[6:10])
dt = data.table(a=1:5, x1 = c("a", "b", "g", l1, l2))

# unlist the lists (and to be honest, that how I would store the data,
# I think the column of lists is a bad idea), then set the keys, merge, and
# go back to columns of lists
setkey(dt[, unlist(x1), by = a], V1)[myData, x2 := i.x2][,
            list(x1 = list(V1), x2 = list(na.omit(x2))), keyby = a]
#   a           x1    x2
#1: 1            a    za
#2: 2            b      
#3: 3            g    zg
#4: 4 a,b,c,d,e,f, za,zg
#5: 5    f,g,h,i,j    zg
+2

All Articles