Although I'm not 100% sure, I figured out how to do this. This example provides tips on how to transfer / retrieve data to / from the user kernel using cv :: UMat, basic types (e.g. int / float / uchar) and Image2D.
#include <iostream> #include <fstream> #include <string> #include <iterator> #include <opencv2/opencv.hpp> #include <opencv2/core/ocl.hpp> using namespace std; void main() { if (!cv::ocl::haveOpenCL()) { cout << "OpenCL is not avaiable..." << endl; return; } cv::ocl::Context context; if (!context.create(cv::ocl::Device::TYPE_GPU)) { cout << "Failed creating the context..." << endl; return; } // In OpenCV 3.0.0 beta, only a single device is detected. cout << context.ndevices() << " GPU devices are detected." << endl; for (int i = 0; i < context.ndevices(); i++) { cv::ocl::Device device = context.device(i); cout << "name : " << device.name() << endl; cout << "available : " << device.available() << endl; cout << "imageSupport : " << device.imageSupport() << endl; cout << "OpenCL_C_Version : " << device.OpenCL_C_Version() << endl; cout << endl; } // Select the first device cv::ocl::Device(context.device(0)); // Transfer Mat data to the device cv::Mat mat_src = cv::imread("Lena.png", cv::IMREAD_GRAYSCALE); mat_src.convertTo(mat_src, CV_32F, 1.0 / 255); cv::UMat umat_src = mat_src.getUMat(cv::ACCESS_READ, cv::USAGE_ALLOCATE_DEVICE_MEMORY); cv::UMat umat_dst(mat_src.size(), CV_32F, cv::ACCESS_WRITE, cv::USAGE_ALLOCATE_DEVICE_MEMORY); std::ifstream ifs("shift.cl"); if (ifs.fail()) return; std::string kernelSource((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); cv::ocl::ProgramSource programSource(kernelSource); // Compile the kernel code cv::String errmsg; cv::String buildopt = cv::format("-D dstT=%s", cv::ocl::typeToStr(umat_dst.depth())); // "-D dstT=float" cv::ocl::Program program = context.getProg(programSource, buildopt, errmsg); cv::ocl::Image2D image(umat_src); float shift_x = 100.5; float shift_y = -50.0; cv::ocl::Kernel kernel("shift", program); kernel.args(image, shift_x, shift_y, cv::ocl::KernelArg::ReadWrite(umat_dst)); size_t globalThreads[3] = { mat_src.cols, mat_src.rows, 1 }; //size_t localThreads[3] = { 16, 16, 1 }; bool success = kernel.run(3, globalThreads, NULL, true); if (!success){ cout << "Failed running the kernel..." << endl; return; } // Download the dst data from the device (?) cv::Mat mat_dst = umat_dst.getMat(cv::ACCESS_READ); cv::imshow("src", mat_src); cv::imshow("dst", mat_dst); cv::waitKey(); }
Below is the file "shift.cl".
__constant sampler_t samplerLN = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; __kernel void shift( __global const image2d_t src, float shift_x, float shift_y, __global uchar* dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) { int x = get_global_id(0); int y = get_global_id(1); if (x >= dst_cols) return; int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT), dst_offset)); __global dstT *dstf = (__global dstT *)(dst + dst_index); float2 coord = (float2)((float)x+0.5f+shift_x, (float)y+0.5f+shift_y); dstf[0] = (dstT)read_imagef(src, samplerLN, coord).x; }
The point is the use of UMat. We get 5 parameters in the kernel (* data_ptr, int step, int offset, int rows, int cols) using KernelArg :: ReadOnly (umat); 3 (* data_ptr, int step, int offset) with KernelArg :: ReadOnlyNoSize (umat); and only 1 (* data_prt) with KernelArg :: PtrReadOnly (umat). This rule is the same for WriteOnly and ReadWrite.
Step and offset are required when accessing the data array, since UMat cannot be a dense matrix due to the alignment of the memory address.
cv :: ocl :: Image2D can be constructed from an UMat instance and can be directly passed to kernel.args (). Using image2D_t and sampler_t, we can use the hardware texture units of the GPU for linear interpolation sampling (with real pixel coordinates).
Note that the build -d -x xxx = yyy option suggests replacing text from xxx to yyy in the kernel code.
In my post you can find more codes: http://qiita.com/tackson5/items/8dac6b083071d31baf00