(cudaStreamSynchronize). , . :
cudaEvent_t event1, event2;
cudaEventCreate(&event1);
cudaEventCreate(&event2);
cudaEventRecord(event1, 0);
kernel<<<grid,block>>>(...);
cudaEventRecord(event2, 0);
cudaEventSynchronize(event1);
cudaEventSynchronize(event2);
float dt_ms;
cudaEventElapsedTime(&dt_ms, event1, event2);
event2, , , . ( ) event1 kernel .
cudaStreamSynchronize cudaThreadSynchronize, .