I am trying to free the device memory dev_inpthat I allocated in my CUDA + OpenGL interaction code. After checking the errors, I got an error Invalid Device Pointer, and the program stopped execution when called cudaFree(dev_inp);at the end of my function renderScene(). Everything is fine, but I'm worried about a memory leak.
Questions:
and. Why can't I free the local memory of the device that I allocated? I unplugged the cuda_resourcepixel buffer from the object and unregistered the resource.
From section B.17 in the CUDA C Programming Guide:
Memory allocated via malloc() cannot be freed using the runtime (i.e. by calling any of the free memory functions from Sections 3.2.2).
So this leads me to two other questions:
b. I don't have malloced memory in the kernel because I don't have one. So, cudaFreeshould using a function (technically?) Work correctly here? Should the programmer free the memory allocated to locally defined pointers, or does the nvcc compiler take care of freeing when the program exits or if it leaves the local area? I don't want a memory leak in my code, so I feel safer taking care of freeing up the memory that I previously allocated.
. cudaDeviceReset() renderScene(), CUDA ( CUDA C)? , NVidia Visual Profiler : cudaDeviceReset()
, . , cudaFree , .
:
#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )
GLuint tex;
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) getchar();
}
}
void initCUDADevice() {
gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));
}
void changeSize(int w, int h) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
if (h == 0)
h = 1;
float ratio = w * 1.0 / h;
glMatrixMode(GL_PROJECTION);
glViewport(0, 0, w, h);
glMatrixMode(GL_MODELVIEW);
}
void renderScene(void) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
Mat image, flipped;
image = imread("K:/Ultrasound experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE);
if(!image.data)
{
cout << "Could not open or find the image" << std::endl ;
}
cv::flip(image, flipped, 0);
imshow("OpenCV - image", image);
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers(1, &pbo);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone));
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, image.cols, image.rows, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );
glBindTexture(GL_TEXTURE_2D, 0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
unsigned char *dev_inp;
gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );
gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );
size_t size;
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );
gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );
gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
glBindTexture(GL_TEXTURE_2D, tex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
gpuErrchk( cudaThreadSynchronize());
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f, 1.0f);
glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f, 1.0f);
glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f, 1.0f, 1.0f);
glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f, 1.0f, 1.0f);
glEnd();
glFlush();
glDisable(GL_TEXTURE_2D);
gpuErrchk(cudaFree(dev_inp));
}
int main(int argc, char **argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DEPTH | GLUT_RGB );
glutInitWindowPosition(100,100);
glutInitWindowSize(1024,256);
glutCreateWindow("CUDA + OpenGL interop");
glutDisplayFunc(renderScene);
glutReshapeFunc(changeSize);
glutMainLoop();
return 1;
}