Create a new column with non-null column names

My dataset is as follows:

library(data.table) df <- data.table(a = c(1,2,3,4,5), b = c(1,0,2,5,1), c = c(0,1,1,0,0), d = c(1,0,0,2,2)) df # abcd # 1: 1 1 0 1 # 2: 2 0 1 0 # 3: 3 2 1 0 # 4: 4 5 0 2 # 5: 5 1 0 2 

I want to create a new column with null column names. The result will be:

 df_result <- data.table(a = c(1,2,3,4,5), z = c('b_d', 'c', 'b_c', 'b_d', 'b_d')) df_result # az # 1: 1 b_d # 2: 2 c # 3: 3 b_c # 4: 4 b_d # 5: 5 b_d 
+6
source share
4 answers

One option is to convert the format from 'wide' to 'long' using melt . We are grouped by 'a', we paste elements of 'variable' that correspond to non-zero elements in 'value' (as a logical condition in 'i').

 melt(df, id.var='a')[value!=0, .(z=paste(variable, collapse="_")), keyby =a] # az #1: 1 b_d #2: 2 c #3: 3 b_c #4: 4 b_d #5: 5 b_d 

Or instead of melt ing, we can group by 'a', unlist subset of Data.table ( .SD ) and paste columns names columns that correspond to non-zero elements ('i1').

 df[, {i1 <- !!unlist(.SD) paste(names(.SD)[i1], collapse="_")} , by= a] 

Benchmarks

 set.seed(24) df1 <- data.table(a=1:1e6, b = sample(0:5, 1e6, replace=TRUE), c = sample(0:4, 1e6, replace=TRUE), d = sample(0:3, 1e6, replace=TRUE)) akrun1 <- function() { melt(df1, id.var='a')[value!=0, .(z=paste(variable, collapse="_")), keyby =a] } akrun2 <- function() { df1[, {i1 <- !!unlist(.SD) paste(names(.SD)[i1], collapse="_")} , by= a] } ronak <- function() { data.table(z = lapply(apply(df1, 1, function(x) which(x[-1]!= 0)), function(x) paste0(names(x), collapse = "_"))) } eddi <- function(){ df1[, newcol := gsub("NA_|_NA|NA", "", do.call(function(...) paste(..., sep = "_"), Map(function(x, y) x[(y == 0) + 1], names(.SD), .SD))) , .SDcols = b:d] } alexis = function(x) { ans = character(nrow(x)) for(j in seq_along(x)) { i = x[[j]] > 0L ans[i] = paste(ans[i], names(x)[[j]], sep = "_") } return(gsub("^_", "", ans)) } system.time(akrun1()) # user system elapsed # 22.04 0.15 22.36 system.time(akrun2()) # user system elapsed # 26.33 0.00 26.41 system.time(ronak()) # user system elapsed # 25.60 0.26 25.96 system.time(alexis(df1[, -1L, with = FALSE])) # user system elapsed # 1.92 0.06 2.09 system.time(eddi()) # user system elapsed # 2.41 0.06 3.19 
+9
source

Assuming nrow >> ncol , you can work in a column

 ff = function(x) { ans = character(nrow(x)) for(j in seq_along(x)) { i = x[[j]] > 0L ans[i] = paste(ans[i], names(x)[[j]], sep = "_") } return(gsub("^_", "", ans)) } ff(df[, -1L, with = FALSE]) #or, `df[, ff(.SD), .SDcols = -1L]` from David Arenburg #[1] "b_d" "c" "b_c" "b_d" "b_d" 
+11
source

Here's a direct approach:

 df[, newcol := gsub("NA_|_NA|NA", "", # remove unwanted text do.call(function(...) paste(..., sep = "_"), # paste colnames together Map(function(x, y) x[(y == 0) + 1], names(.SD), .SD))) # convert data to colnames , .SDcols = b:d] # abcd newcol #1: 1 1 0 1 b_d #2: 2 0 1 0 c #3: 3 2 1 0 b_c #4: 4 5 0 2 b_d #5: 5 1 0 2 b_d 

In akrun test data, it is> 10 times faster.

+8
source

It may be a little long.

For each row that finds a column whose value is not 0, then inserting the column names together.

 data.table(a= df$a, z = lapply(apply(df, 1, function(x) which(x[-1]!= 0)), function(x) paste0(names(x), collapse = "_"))) # az #1: 1 b_d #2: 2 c #3: 3 b_c #4: 4 b_d #5: 5 b_d 
+4
source

All Articles