Effective multidimensional dynamic time warp

Here, as the literature explains, how to calculate multidimensional dynamic time warping of two time series:

 library(dtw)
 x<- cbind(1:10,1)
 y<- cbind(11:15,2)
 cxdist <-dist(x,y,method="euclidean")
 dtw(cxdist)$distance

In fact, he first computes the cross-distance matrix, and then uses it as an input to the dtw function.

I would like to use multidimensional dynamic time variation in the classification of images with fairly large images. Image values ​​are stored in a data frame, which may look like this:

 inDf <- data.frame(matrix(rnorm(60), ncol = 6))
 colnames(inDf) <- c('var1t1','var2t1','var1t2','var2t2','var1t3','var2t3')

In this example, three variables (var1 and var2) are observed three times.

The question is how to get the distance matrix dtw with the highest possible efficiency in relation to the intensity of the calculations?

: - , , , dtw . , ,

+4
1

Rcpp. , Rcpp:

library(Rcpp)
library(inline)

# Rcpp function for euclidean distance
fastdist <- cxxfunction(signature(x="matrix", y="matrix"), plugin="Rcpp",
body='
  Rcpp::NumericMatrix dx(x);
  Rcpp::NumericMatrix dy(y);

  const int N = dx.nrow();
  const int M = dy.nrow();

  Rcpp::NumericMatrix res(N, M);

  for(int i=0; i<N; i++){
    for(int j=0; j<M; j++){
      res(i,j) = sqrt(sum((dx(i,_)-dy(j,_))*(dx(i,_)-dy(j,_))));
    }
  }

  return res;
')

Rcpp sugar, . , .. - fastdist. , , :

# Wrapper R function
fast.dist <- function(x, y){
  stopifnot(class(x) %in% c("data.frame","matrix") &
            class(y) %in% c("data.frame","matrix") &
            ncol(x)==ncol(y))

  fastdist(as.matrix(x), as.matrix(y))
}

.

library(dtw)

# EXAMPLE 1
x<- cbind(1:10,1)
y<- cbind(11:15,2)
# Check results
all.equal(fast.dist(x,y), dist(x,y,method="euclidean"), check.attributes=F)
# [1] "target is matrix, current is crossdist"
all.equal(fast.dist(x,y), matrix(dist(x,y,method="euclidean"), ncol=nrow(y)))
# [1] TRUE

, dist crossdist. , matrix.

- :

# EXAMPLE 2
set.seed(1234)
N <- 100
inDf <- data.frame(matrix(rnorm(6*N), ncol = 6))
colnames(inDf) <- c('var1t1','var2t1','var1t2','var2t2','var1t3','var2t3')

# Extracting variables
var1 <- inDf[,c("var1t1","var1t2","var1t3")]
var2 <- inDf[,c("var2t1","var2t2","var2t3")]

, .

:

library(rbenchmark)

all.equal(fast.dist(var1,var2), matrix(dist(var1,var2), ncol=N))
# [1] TRUE
benchmark(fast.dist(var1,var2), dist(var1,var2), order="relative")[,1:4]
#                    test replications elapsed relative
# 1 fast.dist(var1, var2)          100   0.081    1.000
# 2      dist(var1, var2)          100   0.246    3.037

fast.dist 3 , dist . , N , .

, , dtw . , . . :

cxdist <- fast.dist(var1,var2)
benchmark(dtw(cxdist)$distance, dtw(var1,var2)$distance, order="relative")[,1:4]
#                       test replications elapsed relative
# 1     dtw(cxdist)$distance          100   0.476    1.000
# 2 dtw(var1, var2)$distance          100   0.736    1.546

, $distance, distance.only=T dtw() - .

+2

All Articles