CudaFree - invalid device pointer error

I am trying to free the device memory dev_inpthat I allocated in my CUDA + OpenGL interaction code. After checking the errors, I got an error Invalid Device Pointer, and the program stopped execution when called cudaFree(dev_inp);at the end of my function renderScene(). Everything is fine, but I'm worried about a memory leak.

Questions:

and. Why can't I free the local memory of the device that I allocated? I unplugged the cuda_resourcepixel buffer from the object and unregistered the resource.

From section B.17 in the CUDA C Programming Guide:

Memory allocated via malloc() cannot be freed using the runtime (i.e. by calling any of the free memory functions from Sections 3.2.2).

So this leads me to two other questions:

b. I don't have malloced memory in the kernel because I don't have one. So, cudaFreeshould using a function (technically?) Work correctly here? Should the programmer free the memory allocated to locally defined pointers, or does the nvcc compiler take care of freeing when the program exits or if it leaves the local area? I don't want a memory leak in my code, so I feel safer taking care of freeing up the memory that I previously allocated.

. cudaDeviceReset() renderScene(), CUDA ( CUDA C)? , NVidia Visual Profiler : cudaDeviceReset() , . , cudaFree , .

:

#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )

GLuint tex; 
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;    

PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

// ==========================================================================================
// CUDA ERROR CHECKING CODE
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) getchar();
   }
}

// ==========================================================================================

void initCUDADevice() { 

    gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));    

}

// ==========================================================================================

void changeSize(int w, int h) {

    //cudaDeviceReset();
    //initCUDADevice();

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();

    // Prevent a divide by zero, when window is too short
    // (you cant make a window of zero width).
    if (h == 0)
        h = 1;

    float ratio =  w * 1.0 / h;

    // Use the Projection Matrix
    glMatrixMode(GL_PROJECTION);

    // Reset Matrix
    //glLoadIdentity();

    //// Set the viewport to be the entire window
    glViewport(0, 0, w, h);

    //// Get Back to the Modelview
    glMatrixMode(GL_MODELVIEW);
}

// ==========================================================================================

void renderScene(void) {

    // Clear Color and Depth Buffers
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    // Reset transformations
    glLoadIdentity();

    // ====================================================================================
    // initiate GPU by setting it correctly 
    //initCUDADevice(); 

    // ====================================================================================
    // read the image that needs to be textured 

    Mat image, flipped;
    image = imread("K:/Ultrasound experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE);   // Read the file from disk

    if(!image.data)                              // Check for invalid input
    {
        cout <<  "Could not open or find the image" << std::endl ;


    }

    cv::flip(image, flipped, 0);

    imshow("OpenCV - image", image);    // displays output

    // ====================================================================================
    // allocate the PBO, texture, and CUDA resource

    glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

    // ====================================================================================
    // generate the pixel buffer object (PBO)

    // Generate a buffer ID called a PBO (Pixel Buffer Object)
    glGenBuffers(1, &pbo);

    // Make this the current UNPACK buffer (OpenGL is state-based)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);

    // Allocate data for the buffer. 4-channel 8-bit image
    glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

    gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone)); 

    // ====================================================================================
    // create the texture object 

    // enable 2D texturing
    glEnable(GL_TEXTURE_2D);

    // generate and bind the texture    
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    // put flipped.data at the end for cpu rendering 
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,  image.cols, image.rows,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );

    // put tex at the end for cpu rendering 
    glBindTexture(GL_TEXTURE_2D, 0);

    // ====================================================================================
    // copy OpenCV flipped image data into the device pointer

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    unsigned char *dev_inp; 

    gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

    gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

    size_t size; 
    gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

    gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

    gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) ); 

    // ====================================================================================
    // bind pbo and texture to render data now 

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
    //
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
    gpuErrchk( cudaThreadSynchronize());

    //gpuErrchk(cudaFree(dev_inp));

    // ====================================================================================
    // map the texture coords to the vertex coords 

    glBegin(GL_QUADS);
    // Front Face
    glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);  // Bottom Left Of The Texture and Quad
    glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);  // Bottom Right Of The Texture and Quad
    glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);  // Top Right Of The Texture and Quad
    glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);  // Top Left Of The Texture and Quad

    glEnd();

    glFlush();  // force rendering

    glDisable(GL_TEXTURE_2D);

    //glutSwapBuffers();
    gpuErrchk(cudaFree(dev_inp));        // <--- Error here
    //cudaGraphicsUnregisterResource(cuda_resource);

}


// ==========================================================================================


int main(int argc, char **argv) {


    // init GLUT and create window
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_DEPTH | GLUT_RGB );
    glutInitWindowPosition(100,100);
    glutInitWindowSize(1024,256);
    glutCreateWindow("CUDA + OpenGL interop");


    // register callbacks
    glutDisplayFunc(renderScene);
    glutReshapeFunc(changeSize);
    //glutIdleFunc(renderScene);

    // enter GLUT event processing cycle
    glutMainLoop();

    return 1;
}
+4
1

:

gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

dev_inp.

:

gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

, cuda_resource, , dev_inp, ( cudaMalloc). , , . / .

, dev_inp:

gpuErrchk(cudaFree(dev_inp));        // <--- Error here

, ( cudaMalloc), , , ( ) cuda_resource. . , , dev_inp, (), "" , , .

, :

gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

, cudaFree:

gpuErrchk(cudaFree(dev_inp));        // <--- Error here

cudaDeviceReset CUDA, CUDA/OpenGL, . , cudaDeviceReset , , .

+5

All Articles