R divide the coefficient into m-columns with the original length

Suppose I have a coefficient (in data.frame) that represents years:

  year
1 2012
2 2012
3 2012
4 2013
5 2013
6 2013
7 2014
8 2014
9 2014

I would like to create (in this case) three new columns in data.frame and finally:

  y2012 y2013 y2014
1     1     0     0
2     1     0     0
3     1     0     0
4     0     1     0
5     0     1     0
6     0     1     0
7     0     0     1
8     0     0     1
9     0     0     1

I can, of course, write a bunch of ifelse statements, but that seems very inconvenient.

+4
source share
5 answers

also:

# Add "y" prefix to your years
df$year = paste0("y", df$year)

# Make a table, using row names as one of the variables
out = table(row.names(df), df$year)

# Finally convert to data.frame
out = as.data.frame.matrix(out)

out
#  y2012 y2013 y2014
#1     1     0     0
#2     1     0     0
#3     1     0     0
#4     0     1     0
#5     0     1     0
#6     0     1     0
#7     0     0     1
#8     0     0     1
#9     0     0     1
+2
source

We can use mtabulatefromqdapTools

library(qdapTools)
mtabulate(df1$year)
#  2012 2013 2014
#1    1    0    0
#2    1    0    0
#3    1    0    0
#4    0    1    0
#5    0    1    0
#6    0    1    0
#7    0    0    1
#8    0    0    1
#9    0    0    1

Or using some options in base R.

  • model.matrix. We convert the year column to a class factorand use it in model.matrix to get the binary columns.

    model.matrix(~0+factor(year), df1)
    
  • table. We can get the expected result using the tabledf1 row sequence and the year column.

    table(1:nrow(df1), df1$year)
    
+8

library(dplyr)
library(tidyr)
df %>%
  mutate(id = 1L) %>%
  spread(year, id, fill = 0L)

#   2012 2013 2014
# 1    1    0    0
# 2    1    0    0
# 3    1    0    0
# 4    0    1    0
# 5    0    1    0
# 6    0    1    0
# 7    0    0    1
# 8    0    0    1
# 9    0    0    1

, ( )

library(data.table)
dcast(setDT(df)[, `:=`(indx = .I, indx2 = 1L)], indx ~ year, fill = 0L)
#    indx 2012 2013 2014
# 1:    1    1    0    0
# 2:    2    1    0    0
# 3:    3    1    0    0
# 4:    4    0    1    0
# 5:    5    0    1    0
# 6:    6    0    1    0
# 7:    7    0    0    1
# 8:    8    0    0    1
# 9:    9    0    0    1
+5

If you want to stick to baseR,

dframe <- data.frame(x = factor(rep(2012:2014, each = 3)))

lapply(levels(dframe$x),
       function(l, x) ifelse(x %in% l, 1, 0),
       dframe$x)
+3
source

This can also be done with contrasts.

contrasts(factor(df1$year), contrasts=F)[factor(df1$year),]
#      2012 2013 2014
# 2012    1    0    0
# 2012    1    0    0
# 2012    1    0    0
# 2013    0    1    0
# 2013    0    1    0
# 2013    0    1    0
# 2014    0    0    1
# 2014    0    0    1
# 2014    0    0    1
+2
source

All Articles