, ( , , , , ).
#include <time.h>
__global__ void slowKernel(float *output, bool write=false)
{
float input = 5;
#pragma unroll
for( int i = 0; i < 10000000; i++ ){
input = input * .9999999;
}
if (write) *output -= input;
}
__global__ void fastKernel(float *output, bool write=false)
{
float input = 5;
#pragma unroll
for( int i = 0; i < 100000; i++ ){
input = input * .9999999;
}
if (write) *output -= input;
}
void burntime(long val) {
struct timespec tv[] = {{0, val}};
nanosleep(tv, 0);
}
void mallocSynchronize(){
cudaStream_t stream1, stream2;
cudaStreamCreate( &stream1 );
cudaStreamCreate( &stream2 );
const size_t sz = 1 << 21;
slowKernel <<<1, 1, 0, stream1 >>>((float *)(0));
burntime(500000000L);
int *dev_a = 0;
for( int i = 0; i < 10; i++ ){
cudaMalloc( &dev_a, sz );
fastKernel <<<1, 1, 0, stream2 >>>((float *)(0));
burntime(1000000L);
}
}
int main()
{
mallocSynchronize();
cudaDeviceSynchronize();
cudaDeviceReset();
return 0;
}
[note POSIX, Windows]
(GTX970) , cudaMalloc slowKernel , fastKernel . , , . 0,5 . , burntime, , , .
, : , , Linux CUDA 7.5 Maxwell. , , , , , / . CUDA , , Fermi Kepler.