Is this the right way to synchronize kernel runtime for OpenCL? I am very interested in using the C ++ shell (unfortunately, there are not many examples of timings).
cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); checkErr(err, "Cannot create the command queue"); for (unsigned i = 0; i < NUMBER_OF_ITERATIONS; ++i) { err = queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(512), cl::NullRange, NULL, NULL); checkErr(err, "Cannot enqueue the kernel"); } queue.finish(); cl::Event start, stop; queue.enqueueMarker(&start); for (unsigned i = 0; i < NUMBER_OF_ITERATIONS; ++i) { err = queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(512), cl::NullRange, NULL, NULL); checkErr(err, "Cannot enqueue the kernel"); } queue.enqueueMarker(&stop); stop.wait(); cl_ulong time_start, time_end; double total_time; start.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_start); stop.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_end); total_time = time_end - time_start; cout << "Execution time in milliseconds " << total_time / (float)10e6 / NUMBER_OF_ITERATIONS << endl;
source share