Based on our chat discussion, you are not compiling with optimization enabled. If you do, you will see a noticeable increase in performance. Also, make sure you reference the OpenCV release build.
I measured the runtime for the following example: without and with optimization:
main.cpp
#include <algorithm> #include <iostream> #include <iterator> #include <numeric> #include <random> #include <vector> #include <chrono> #include <opencv2/opencv.hpp> int main(int argc, char **argv) { const int num_rows = 32678; const int num_cols = 10; const int index_size = 24700; const int num_runs = 1000; const int seed = 42; std::vector<int> index_vec(num_rows); // fill index with sequence std::iota (index_vec.begin(), index_vec.end(), 0); // randomize sequence std::random_device rd; std::mt19937 g(rd()); g.seed(seed); std::shuffle(index_vec.begin(), index_vec.end(), g); // trunkate index index_vec.resize(index_size); cv::Mat w2c(num_rows, num_cols, CV_32F); // copy cv::Mat out(index_size, w2c.cols, w2c.type()); auto start = std::chrono::high_resolution_clock::now(); for (int k = 0; k<num_runs; ++k) { for (int i = 0; i < index_size; ++i) { w2c.row(index_vec[i]).copyTo(out.row(i)); } } auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); std::cout << duration.count()/num_runs << " microseconds" << std::endl; return 0; }
CMakeLists.txt
project(copy) find_package(OpenCV REQUIRED) add_executable(copy main.cpp) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") include_directories(${OpenCV_INCLUDE_DIRS}) target_link_libraries(copy ${OpenCV_LIBS})
Compile and run without optimization
cmake . -DCMAKE_BUILD_TYPE=DEBUG make ./copy 3924 microseconds
Compile and run with optimization
cmake . -DCMAKE_BUILD_TYPE=RELEASE make ./copy 2664 microseconds
I conducted these tests on
- Intel Core i7-4600U Processor
- Ubuntu 14.04 (x64)
- GCC 4.8.2
- OpenCV 3.0.0 (release)
source share