I saw several questions about merging csv files into one data frame. What to do if data frames are already in the workspace. I have five wide zoos that I drop as data frames and then melt away. Here is the chapter of one:
> head(df.mon.ssf.ret)
date variable value
1 2009.000 AA1C NA
2 2009.083 AA1C NA
3 2009.167 AA1C NA
4 2009.250 AA1C NA
5 2009.333 AA1C NA
6 2009.417 AA1C NA
I could combine them in a "date" and a "variable" with a series of nested merges, but that seems awkward. Is there a more programmatic way to merge?
If I am sure that the columns are in the same order in all zoos, can I be sure that the melt maintains this order and uses it cbind? Thank!
Update:
- . , , :
> temp <- merge(z.ssf.oi, z.ssf.oig, z.ssf.ret)
> class(temp)
[1] "zoo"
> temp2 <- cbind(index(temp), as.data.frame(temp))
> class(temp2)
[1] "data.frame"
> names(temp2)[1] <- "date"
> dim(temp2)
[1] 12 1204
> temp3 <- melt(temp2, id="date")
Error in data.frame(ids, variable, value) :
arguments imply differing number of rows: 12, 14436
> head(temp2)[, 1:5]
date AA1C.z.ssf.oi AAPL1C.z.ssf.oi ABT1C.z.ssf.oi ABX1C.z.ssf.oi
Jan 2009 Jan 2009 1895.800 49191.25 NA NA
Feb 2009 Feb 2009 1415.579 42650.26 NA 6267.96
Mar 2009 Mar 2009 1501.398 36712.20 NA 11581.65
Apr 2009 Apr 2009 1752.936 74376.27 NA 12168.29
May 2009 May 2009 1942.874 96307.30 NA 13490.60
Jun 2009 Jun 2009 NA 79170.70 NA 16337.21
2: !
> A <- cbind(index(z.ssf.oi), as.data.frame(z.ssf.oi))
> names(A)[1] <- "date"
> B <- cbind(index(z.ssf.oig), as.data.frame(z.ssf.oig))
> names(B)[1] <- "date"
> C <- cbind(index(z.ssf.ret), as.data.frame(z.ssf.ret))
> names(C)[1] <- "date"
> A.melt <- melt(A, id="date")
> head(A.melt)
date variable value
1 Jan 2009 A1C NA
2 Feb 2009 A1C NA
3 Mar 2009 A1C NA
4 Apr 2009 A1C NA
5 May 2009 A1C NA
6 Jun 2009 A1C NA
> B.melt <- melt(B, id="date")
> C.melt <- melt(C, id="date")
> ans <- merge(merge(A.melt, B.melt, by=c("date", "variable")), C.melt, by=c("date", "variable"))
> names(ans)[3:5] <- c("oi", "oig", "ret")
> head(ans)
date variable oi oig ret
1 Apr 2009 A1C NA NA NA
2 Apr 2009 AA1C NA NA NA
3 Apr 2009 AAPL1C 59316.88 0.3375786 0.008600073
4 Apr 2009 ABB1C NA NA NA
5 Apr 2009 ABT1C NA NA NA
6 Apr 2009 ABX1C NA NA NA
( NA - )
3: dputs ( [1:10, 1:10] )
> dput(A)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "oi"), date = structure(c(2009,
2009.08333333333, 2009.16666666667, 2009.25, 2009.33333333333,
2009.41666666667, 2009.5, 2009.58333333333, 2009.66666666667,
2009.75), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(49226.391, 42662.1589473684, 35354.4254545455,
57161.6495238095, 84362.895, NA, NA, 47011.8519047619, 57852.2171428571,
33058.0090909091), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
ABX1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACE1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), ACI1C = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACS1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("group", "date",
"AA1C", "AAPL1C", "ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C",
"ADBE1C", "ADCT1C", "ADI1C"), row.names = c("Jan 2009", "Feb 2009",
"Mar 2009", "Apr 2009", "May 2009", "Jun 2009", "Jul 2009", "Aug 2009",
"Sep 2009", "Oct 2009"), class = "data.frame")
> dput(B)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "oig"), date = structure(c(2009.08333333333,
2009.16666666667, 2009.25, 2009.33333333333, 2009.41666666667,
2009.5, 2009.58333333333, 2009.66666666667, 2009.75, 2009.83333333333
), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(-0.143117562125788, -0.187888745830302, 0.480459636485712,
0.389244461579155, NA, NA, NA, 0.207492040517069, -0.559627909130612,
NA), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ABX1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ACI1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACS1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_)), .Names = c("group", "date", "AA1C", "AAPL1C",
"ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C", "ADBE1C", "ADCT1C",
"ADI1C"), row.names = c("Feb 2009", "Mar 2009", "Apr 2009", "May 2009",
"Jun 2009", "Jul 2009", "Aug 2009", "Sep 2009", "Oct 2009", "Nov 2009"
), class = "data.frame")
> dput(C)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "ret"), date = structure(c(2009,
2009.08333333333, 2009.16666666667, 2009.25, 2009.33333333333,
2009.41666666667, 2009.5, 2009.58333333333, 2009.66666666667,
2009.75), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(-0.143117562125788, -0.187888745830302, 0.480459636485712,
0.389244461579155, NA, NA, NA, 0.207492040517069, -0.559627909130612,
NA), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ABX1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ACI1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACS1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_)), .Names = c("group", "date", "AA1C", "AAPL1C",
"ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C", "ADBE1C", "ADCT1C",
"ADI1C"), row.names = c("Feb 2009", "Mar 2009", "Apr 2009", "May 2009",
"Jun 2009", "Jul 2009", "Aug 2009", "Sep 2009", "Oct 2009", "Nov 2009"
), class = "data.frame")