I would really write a function something like this:
myFun <- function(inDist) { if (class(inDist) != "dist") stop("wrong input type") A <- attr(inDist, "Size") B <- if (is.null(attr(inDist, "Labels"))) sequence(A) else attr(inDist, "Labels") if (isTRUE(attr(inDist, "Diag"))) attr(inDist, "Diag") <- FALSE if (isTRUE(attr(inDist, "Upper"))) attr(inDist, "Upper") <- FALSE data.frame( row = B[unlist(lapply(sequence(A)[-1], function(x) x:A))], col = rep(B[-length(B)], (length(B)-1):1), value = as.vector(inDist)) }
Now imagine what we start with (note the non-numeric row and column names):
dd <- as.dist((1 - cor(USJudgeRatings)[1:5, 1:5])/2)
We can change it simple:
myFun(dd) # row col value # 1 INTG CONT 0.56659545 # 2 DMNR CONT 0.57684427 # 3 DILG CONT 0.49380400 # 4 CFMG CONT 0.43154385 # 5 DMNR INTG 0.01769236 # 6 DILG INTG 0.06424445 # 7 CFMG INTG 0.09295712 # 8 DILG DMNR 0.08157452 # 9 CFMG DMNR 0.09332092 # 10 CFMG DILG 0.02060062
Quick performance comparison:
set.seed(1) x <- matrix(rnorm(1000*1000), nrow = 1000) dd <- dist(x) ## Jake function fun2 <- function(inDist) { df <- melt(as.matrix(inDist), varnames = c("row", "col")) df[as.numeric(df$row) > as.numeric(df$col), ] } all(fun2(dd) == myFun(dd)) # [1] TRUE system.time(fun2(dd)) # user system elapsed # 0.346 0.002 0.349 system.time(myFun(dd)) # user system elapsed # 0.012 0.000 0.015
A5C1D2H2I1M1N2O1R2T1
source share