Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
- /* This example demonstrates how to use the Video Decode Library with CUDA
- * bindings to interop between NVCUVID(CUDA) and OpenGL (PBOs). Post-Processing
- * video (de-interlacing) is supported with this sample.
- */
- // OpenGL Graphics includes
- #include <GL/glew.h>
- #if defined(__APPLE__) || defined(__MACOSX)
- #include <GLUT/glut.h>
- #else
- #include <GL/freeglut.h>
- #endif
- // CUDA Header includes
- #include <cuda.h>
- #include <cudaGL.h>
- // CUDA utilities and system includes
- #include <helper_functions.h>
- #include <helper_cuda.h>
- #include <helper_cuda_drvapi.h>
- #include <helper_cuda_gl.h>
- // Includes
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <math.h>
- #include <memory>
- #include <iostream>
- #include <cassert>
- // cudaDecodeGL related helper functions
- #include "FrameQueue.h"
- #include "VideoSource.h"
- #include "VideoParser.h"
- #include "VideoDecoder.h"
- #include "ImageGL.h"
- #include "cudaProcessFrame.h"
- #include "cudaModuleMgr.h"
- const char *sAppName = "CUDA/OpenGL Video Decode";
- const char *sAppFilename = "cudaDecodeGL";
- const char *sSDKname = "cudaDecodeGL";
- #define VIDEO_SOURCE_FILE "plush1_720p_10s.m2v"
- #ifdef _DEBUG
- #define ENABLE_DEBUG_OUT 0
- #else
- #define ENABLE_DEBUG_OUT 0
- #endif
- StopWatchInterface *frame_timer = NULL,
- *global_timer = NULL;
- // RMC ADD
- int g_FrameCapCount = 0; // added to capture a specific frame when readback is enabled
- // set g_FrameCount to some negative value to disable Frame capture to file
- // otherwise set to zero
- // g_bUseInterop must also be zero for FrameCap
- int g_FrameCapSelect = 37; // select which frame to capture to a file
- int g_FrameCapPitch = 0;
- int g_FrameCapHeight = 0;
- char g_FrameCapFile[] = "framecap.bmp";
- // END RMC ADD
- int g_DeviceID = 0;
- bool g_bWindowed = true;
- bool g_bDeviceLost = false;
- bool g_bDone = false;
- bool g_bRunning = false;
- bool g_bAutoQuit = false;
- bool g_bUseVsync = false;
- bool g_bFrameRepeat= false;
- bool g_bFrameStep = false;
- bool g_bQAReadback = false;
- bool g_bGLVerify = false;
- bool g_bFirstFrame = true;
- bool g_bLoop = false;
- bool g_bUpdateCSC = true;
- bool g_bUpdateAll = false;
- bool g_bLinearFiltering = false;
- bool g_bUseDisplay = true; // this flag enables/disables video on the window
- bool g_bUseInterop = true;
- bool g_bReadback = false;
- bool g_bIsProgressive = true; // assume it is progressive, unless otherwise noted
- bool g_bException = false;
- bool g_bWaived = false;
- int g_iRepeatFactor = 1; // 1:1 assumes no frame repeats
- int *pArgc = NULL;
- char **pArgv = NULL;
- cudaVideoCreateFlags g_eVideoCreateFlags = cudaVideoCreate_PreferCUVID;
- CUvideoctxlock g_CtxLock = NULL;
- float present_fps, decoded_fps, total_time = 0.0f;
- // These are CUDA function pointers to the CUDA kernels
- CUmoduleManager *g_pCudaModule;
- CUmodule cuModNV12toARGB = 0;
- CUfunction g_kernelNV12toARGB = 0;
- CUfunction g_kernelPassThru = 0;
- CUcontext g_oContext = 0;
- CUdevice g_oDevice = 0;
- CUstream g_ReadbackSID = 0, g_KernelSID = 0;
- eColorSpace g_eColorSpace = ITU601;
- float g_nHue = 0.0f;
- // System Memory surface we want to readback to
- BYTE *g_bFrameData[2] = { 0, 0 };
- // RMC ADD
- BYTE *g_bFrameCapData = 0;
- // END RMC ADD
- FrameQueue *g_pFrameQueue = 0;
- VideoSource *g_pVideoSource = 0;
- VideoParser *g_pVideoParser = 0;
- VideoDecoder *g_pVideoDecoder = 0;
- ImageGL *g_pImageGL = 0; // if we're using OpenGL
- CUdeviceptr g_pInteropFrame[2] = { 0, 0 }; // if we're using CUDA malloc
- std::string sFileName;
- char exec_path[256];
- unsigned int g_nWindowWidth = 0;
- unsigned int g_nWindowHeight = 0;
- unsigned int g_nClientAreaWidth = 0;
- unsigned int g_nClientAreaHeight = 0;
- unsigned int g_nVideoWidth = 0;
- unsigned int g_nVideoHeight = 0;
- unsigned int g_FrameCount = 0;
- unsigned int g_DecodeFrameCount = 0;
- unsigned int g_fpsCount = 0; // FPS count for averaging
- unsigned int g_fpsLimit = 16; // FPS limit for sampling timer;
- // Forward declarations
- //RMC ADD
- void saveFrameCap(BYTE *data, size_t pitch, size_t height);
- //END RMC ADD
- bool initGL(int argc, char **argv, int *pbTCC);
- bool initGLTexture(unsigned int nWidth, unsigned int nHeight);
- bool loadVideoSource(const char *video_file,
- unsigned int &width, unsigned int &height,
- unsigned int &dispWidth, unsigned int &dispHeight);
- void initCudaVideo();
- void freeCudaResources(bool bDestroyContext);
- bool copyDecodedFrameToTexture(unsigned int &nRepeats, int bUseInterop, int *pbIsProgressive);
- void cudaPostProcessFrame(CUdeviceptr *ppDecodedFrame, size_t nDecodedPitch,
- CUdeviceptr *ppInteropFrame, size_t pFramePitch,
- CUmodule cuModNV12toARGB,
- CUfunction fpCudaKernel, CUstream streamID);
- bool drawScene(int field_num);
- bool cleanup(bool bDestroyContext);
- bool initCudaResources(int argc, char **argv, int *bTCC);
- void renderVideoFrame(int bUseInterop);
- #ifdef WIN32
- typedef bool (APIENTRY *PFNWGLSWAPINTERVALFARPROC)(int);
- PFNWGLSWAPINTERVALFARPROC wglSwapIntervalEXT = 0;
- // This allows us to turn off vsync for OpenGL
- void setVSync(int interval)
- {
- const GLubyte *extensions = glGetString(GL_EXTENSIONS);
- if (strstr((const char *)extensions, "WGL_EXT_swap_control") == 0)
- {
- return; // Error: WGL_EXT_swap_control extension not supported on your computer.\n");
- }
- else
- {
- wglSwapIntervalEXT = (PFNWGLSWAPINTERVALFARPROC)wglGetProcAddress("wglSwapIntervalEXT");
- if (wglSwapIntervalEXT)
- {
- wglSwapIntervalEXT(interval);
- }
- }
- }
- #ifndef STRCASECMP
- #define STRCASECMP _stricmp
- #endif
- #ifndef STRNCASECMP
- #define STRNCASECMP _strnicmp
- #endif
- #else
- void setVSync(int interval)
- {
- }
- #ifndef STRCASECMP
- #define STRCASECMP strcasecmp
- #endif
- #ifndef STRNCASECMP
- #define STRNCASECMP strncasecmp
- #endif
- #endif
- void printStatistics()
- {
- int hh, mm, ss, msec;
- present_fps = 1.f / (total_time / (g_FrameCount * 1000.f));
- decoded_fps = 1.f / (total_time / (g_DecodeFrameCount * 1000.f));
- msec = ((int)total_time % 1000);
- ss = (int)(total_time/1000) % 60;
- mm = (int)(total_time/(1000*60)) % 60;
- hh = (int)(total_time/(1000*60*60)) % 60;
- printf("\n[%s] statistics\n", sAppFilename);
- printf("\t Video Length (hh:mm:ss.msec) = %02d:%02d:%02d.%03d\n", hh, mm, ss, msec);
- printf("\t Frames Presented (inc repeats) = %d\n", g_FrameCount);
- printf("\t Average Present Rate (fps) = %4.2f\n", present_fps);
- printf("\t Frames Decoded (hardware) = %d\n", g_DecodeFrameCount);
- printf("\t Average Rate of Decoding (fps) = %4.2f\n", decoded_fps);
- }
- void computeFPS(int bUseInterop)
- {
- sdkStopTimer(&frame_timer);
- if (g_bRunning)
- {
- g_fpsCount++;
- if (!g_pFrameQueue->isEndOfDecode())
- {
- g_FrameCount++;
- }
- }
- char sFPS[256];
- std::string sDecodeStatus;
- if (g_bDeviceLost)
- {
- sDecodeStatus = "DeviceLost!\0";
- sprintf(sFPS, "%s [%s] - [%s %d]",
- sSDKname, sDecodeStatus.c_str(),
- (g_bIsProgressive ? "Frame" : "Field"),
- g_DecodeFrameCount);
- if (!g_bQAReadback || g_bGLVerify)
- {
- glutSetWindowTitle(sFPS);
- }
- sdkResetTimer(&frame_timer);
- g_fpsCount = 0;
- return;
- }
- if (g_pFrameQueue->isEndOfDecode())
- {
- sDecodeStatus = "STOP (End of File)\0";
- // we only want to record this once
- if (total_time == 0.0f)
- {
- total_time = sdkGetTimerValue(&global_timer);
- }
- sdkStopTimer(&global_timer);
- if (g_bAutoQuit)
- {
- g_bRunning = false;
- g_bDone = true;
- }
- }
- else
- {
- if (!g_bRunning)
- {
- sDecodeStatus = "PAUSE\0";
- sprintf(sFPS, "%s [%s] - [%s %d] - Video Display %s / Vsync %s",
- sAppFilename, sDecodeStatus.c_str(),
- (g_bIsProgressive ? "Frame" : "Field"),
- g_DecodeFrameCount,
- g_bUseDisplay ? "ON" : "OFF",
- g_bUseVsync ? "ON" : "OFF");
- if (bUseInterop && (!g_bQAReadback || g_bGLVerify))
- {
- glutSetWindowTitle(sFPS);
- }
- }
- else
- {
- if (g_bFrameStep)
- {
- sDecodeStatus = "STEP\0";
- }
- else
- {
- sDecodeStatus = "PLAY\0";
- }
- }
- if (g_fpsCount == g_fpsLimit)
- {
- float ifps = 1.f / (sdkGetAverageTimerValue(&frame_timer) / 1000.f);
- sprintf(sFPS, "%s [%s] - [%3.1f fps, %s %d] - Video Display %s / Vsync %s",
- sAppFilename, sDecodeStatus.c_str(), ifps,
- (g_bIsProgressive ? "Frame" : "Field"),
- g_DecodeFrameCount,
- g_bUseDisplay ? "ON" : "OFF",
- g_bUseVsync ? "ON" : "OFF");
- if (bUseInterop)
- {
- glutSetWindowTitle(sFPS);
- }
- printf("[%s] - [%s: %04d, %04.1f fps, frame time: %04.2f (ms) ]\n",
- sAppFilename,
- (g_bIsProgressive ? "Frame" : "Field"),
- g_FrameCount, ifps, 1000.f/ifps);
- sdkResetTimer(&frame_timer);
- g_fpsCount = 0;
- }
- }
- if (g_bDone && g_bAutoQuit && bUseInterop)
- {
- printStatistics();
- cleanup(true);
- exit(EXIT_SUCCESS);
- }
- sdkStartTimer(&frame_timer);
- }
- bool initCudaResources(int argc, char **argv, int *bTCC)
- {
- printf("\n");
- for (int i=0; i < argc; i++)
- {
- printf("argv[%d] = %s\n", i, argv[i]);
- }
- printf("\n");
- CUdevice cuda_device;
- // Device is specified at the command line, we need to check if this it TCC or not, and then call the
- // appropriate TCC/WDDM findCudaDevice in order to initialize the CUDA device
- if (checkCmdLineFlag(argc, (const char **)argv, "device"))
- {
- cuda_device = getCmdLineArgumentInt(argc, (const char **) argv, "device");
- cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
- checkCudaErrors(cuDeviceGetAttribute(bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, cuda_device));
- if (*bTCC)
- {
- char name[100];
- cuDeviceGetName(name, 100, cuda_device);
- printf("CUDA device=%d [%s] is a TCC Compute device\n", cuda_device, name);
- g_bUseInterop = false;
- }
- else
- {
- if (g_bUseInterop)
- {
- initGL(argc, argv, bTCC);
- cuda_device = findCudaGLDeviceDRV(argc, (const char **)argv);
- }
- else
- {
- cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
- }
- }
- if (cuda_device < 0)
- {
- printf("No CUDA Capable devices found, exiting...\n");
- exit(EXIT_SUCCESS);
- }
- checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
- }
- else
- {
- // If we want to use Graphics Interop, then choose the GPU that is capable
- if (g_bUseInterop && !(*bTCC))
- {
- initGL(argc, argv, bTCC);
- cuda_device = findCudaGLDeviceDRV(argc, (const char **)argv);
- checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
- }
- else
- {
- cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
- checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
- }
- }
- // get compute capabilities and the devicename
- int major, minor;
- size_t totalGlobalMem;
- char deviceName[256];
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, g_oDevice));
- checkCudaErrors(cuDeviceGetName(deviceName, 256, g_oDevice));
- printf("> Using GPU Device: %s has SM %d.%d compute capability\n", deviceName, major, minor);
- checkCudaErrors(cuDeviceTotalMem(&totalGlobalMem, g_oDevice));
- printf(" Total amount of global memory: %4.4f MB\n", (float)totalGlobalMem/(1024*1024));
- // Create CUDA Device w/ GL interop (if WDDM), otherwise CUDA w/o interop (if TCC)
- // (use CU_CTX_BLOCKING_SYNC for better CPU synchronization)
- if (g_bUseInterop && !(*bTCC))
- {
- checkCudaErrors(cuGLCtxCreate(&g_oContext, CU_CTX_BLOCKING_SYNC, g_oDevice));
- }
- else
- {
- checkCudaErrors(cuCtxCreate(&g_oContext, CU_CTX_BLOCKING_SYNC, g_oDevice));
- }
- // Initialize CUDA releated Driver API
- // Determine if we are running on a 32-bit or 64-bit OS and choose the right PTX file
- try
- {
- if (sizeof(void *) == 4)
- {
- g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_Win32.ptx", exec_path, 2, 2, 2);
- }
- else
- {
- g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_x64.ptx", exec_path, 2, 2, 2);
- }
- }
- catch (char const *p_file)
- {
- // If the CUmoduleManager constructor fails to load the PTX file, it will throw an exception
- printf("\n>> CUmoduleManager::Exception! %s not found!\n", p_file);
- printf(">> Please rebuild NV12ToARGB_drvapi.cu or re-install this sample.\n");
- return false;
- }
- g_pCudaModule->GetCudaFunction("NV12ToARGB_drvapi", &g_kernelNV12toARGB);
- g_pCudaModule->GetCudaFunction("Passthru_drvapi", &g_kernelPassThru);
- /////////////////Change///////////////////////////
- // Now we create the CUDA resources and the CUDA decoder context
- initCudaVideo();
- if (g_bUseInterop)
- {
- initGLTexture(g_pVideoDecoder->targetWidth(),
- g_pVideoDecoder->targetHeight());
- }
- else
- { //RMC Modified next 2 lines
- checkCudaErrors(cuMemAlloc(&g_pInteropFrame[0], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 4));
- checkCudaErrors(cuMemAlloc(&g_pInteropFrame[1], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 4));
- }
- CUcontext cuCurrent = NULL;
- CUresult result = cuCtxPopCurrent(&cuCurrent);
- if (result != CUDA_SUCCESS)
- {
- printf("cuCtxPopCurrent: %d\n", result);
- assert(0);
- }
- /////////////////////////////////////////
- return ((g_pCudaModule && g_pVideoDecoder) ? true : false);
- }
- bool reinitCudaResources()
- {
- // Free resources
- cleanup(false);
- // Reinit VideoSource and Frame Queue
- g_bIsProgressive = loadVideoSource(sFileName.c_str(),
- g_nVideoWidth, g_nVideoHeight,
- g_nWindowWidth, g_nWindowHeight);
- /////////////////Change///////////////////////////
- initCudaVideo();
- initGLTexture(g_pVideoDecoder->targetWidth(),
- g_pVideoDecoder->targetHeight());
- /////////////////////////////////////////
- return S_OK;
- }
- void displayHelp()
- {
- printf("\n");
- printf("%s - Help\n\n", sAppName);
- printf(" %s [parameters] [video_file]\n\n", sAppFilename);
- printf("Program parameters:\n");
- printf("\t-decodecuda - Use CUDA for MPEG-2 (Available with 64+ CUDA cores)\n");
- printf("\t-decodedxva - Use VP for MPEG-2, VC-1, H.264 decode.\n");
- printf("\t-decodecuvid - Use VP for MPEG-2, VC-1, H.264 decode (optimized)\n");
- printf("\t-vsync - Enable vertical sync.\n");
- printf("\t-novsync - Disable vertical sync.\n");
- printf("\t-repeatframe - Enable frame repeats.\n");
- printf("\t-updateall - always update CSC matrices.\n");
- printf("\t-displayvideo - display video frames on the window\n");
- printf("\t-nointerop - create the CUDA context w/o using graphics interop\n");
- printf("\t-readback - enable readback of frames to system memory\n");
- printf("\t-device=n - choose a specific GPU device to decode video with\n");
- }
- void parseCommandLineArguments(int argc, char *argv[])
- {
- char *video_file;
- printf("Command Line Arguments:\n");
- for (int n=0; n < argc; n++)
- {
- printf("argv[%d] = %s\n", n, argv[n]);
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "help"))
- {
- displayHelp();
- exit(EXIT_SUCCESS);
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "decodecuda"))
- {
- g_eVideoCreateFlags = cudaVideoCreate_PreferCUDA;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "decodedxva"))
- {
- g_eVideoCreateFlags = cudaVideoCreate_PreferDXVA;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "decodecuvid"))
- {
- g_eVideoCreateFlags = cudaVideoCreate_PreferCUVID;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "vsync"))
- {
- g_bUseVsync = true;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "novsync"))
- {
- g_bUseVsync = false;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "repeatframe"))
- {
- g_bFrameRepeat = true;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "framestep"))
- {
- g_bFrameStep = true;
- g_bUseDisplay = true;
- g_fpsLimit = 1;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "updateall"))
- {
- g_bUpdateAll = true;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "displayvideo"))
- {
- g_bUseDisplay = true;
- g_bUseInterop = true;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "nointerop"))
- {
- g_bUseInterop = false;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "readback"))
- {
- g_bReadback = true;
- }
- if (checkCmdLineFlag(argc, (const char **)argv, "device"))
- {
- g_DeviceID = getCmdLineArgumentInt(argc, (const char **)argv, "device");
- // g_bUseDisplay = true;
- }
- if (g_bUseDisplay == false)
- {
- g_bQAReadback = true;
- }
- if (g_bLoop == false)
- {
- g_bAutoQuit = true;
- }
- // Search all command file parameters for video files with extensions:
- // mp4, avc, mkv, 264, h264. vc1, wmv, mp2, mpeg2, mpg
- char *file_ext = NULL;
- for (int i=1; i < argc; i++)
- {
- if (getFileExtension(argv[i], &file_ext) > 0)
- {
- video_file = argv[i];
- break;
- }
- }
- // We load the default video file for the SDK sample
- if (file_ext == NULL)
- {
- video_file = sdkFindFilePath(VIDEO_SOURCE_FILE, argv[0]);
- }
- // Now verify the video file is legit
- FILE *fp = fopen(video_file, "r");
- if (video_file == NULL && fp == NULL)
- {
- printf("[%s]: unable to find file: <%s>\nExiting...\n", sAppFilename, VIDEO_SOURCE_FILE);
- exit(EXIT_FAILURE);
- }
- if (fp)
- {
- fclose(fp);
- }
- // default video file loaded by this sample
- sFileName = video_file;
- // store the current path so we can reinit the CUDA context
- strcpy(exec_path, argv[0]);
- printf("[%s]: input file: <%s>\n", sAppFilename, sFileName.c_str());
- }
- int main(int argc, char *argv[])
- {
- printf("[%s]\n", sAppName);
- sdkCreateTimer(&frame_timer);
- sdkResetTimer(&frame_timer);
- sdkCreateTimer(&global_timer);
- sdkResetTimer(&global_timer);
- // parse the command line arguments
- parseCommandLineArguments(argc, argv);
- // Find out the video size
- g_bIsProgressive = loadVideoSource(sFileName.c_str(),
- g_nVideoWidth, g_nVideoHeight,
- g_nWindowWidth, g_nWindowHeight);
- // Determine the proper window size needed to create the correct *client* area
- // that is of the size requested by m_dimensions.
- #ifdef WIN32
- RECT adjustedWindowSize;
- DWORD dwWindowStyle = WS_OVERLAPPEDWINDOW | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
- SetRect(&adjustedWindowSize, 0, 0, g_nVideoWidth , g_nVideoHeight);
- AdjustWindowRect(&adjustedWindowSize, dwWindowStyle, false);
- #endif
- g_nVideoWidth = PAD_ALIGN(g_nVideoWidth , 0x3F);
- g_nVideoHeight = PAD_ALIGN(g_nVideoHeight , 0x0F);
- // Initialize the CUDA Device
- cuInit(0);
- // Initialize CUDA and try to connect with an OpenGL context
- // Other video memory resources will be available
- int bTCC = 0;
- if (initCudaResources(argc, argv, &bTCC) == false)
- {
- g_bAutoQuit = true;
- g_bException = true;
- g_bWaived = true;
- goto ExitApp;
- }
- g_pVideoSource->start();
- g_bRunning = true;
- sdkStartTimer(&global_timer);
- sdkResetTimer(&global_timer);
- if (!g_bUseInterop)
- {
- // On this case we drive the display with a while loop (no openGL calls)
- while (g_pVideoSource->isStarted() && !g_pFrameQueue->isEndOfDecode())
- {
- renderVideoFrame(g_bUseInterop);
- }
- }
- else
- {
- glutMainLoop();
- }
- g_pFrameQueue->endDecode();
- g_pVideoSource->stop();
- printStatistics();
- ExitApp:
- // clean up CUDA and OpenGL resources
- cleanup(g_bWaived ? false : true);
- if (g_bWaived)
- {
- exit(EXIT_SUCCESS);
- }
- else
- {
- exit(g_bException ? EXIT_FAILURE : EXIT_SUCCESS);
- }
- return 0;
- }
- // display results using OpenGL
- void display()
- {
- renderVideoFrame(true);
- }
- void keyboard(unsigned char key, int x, int y)
- {
- switch (key)
- {
- case 27:
- if (g_pFrameQueue)
- {
- g_pFrameQueue->endDecode();
- }
- if (g_pVideoSource)
- {
- g_pVideoSource->stop();
- }
- printStatistics();
- cleanup(true);
- exit(EXIT_FAILURE);
- break;
- case 'F':
- case 'f':
- g_bLinearFiltering = !g_bLinearFiltering;
- if (g_pImageGL)
- g_pImageGL->setTextureFilterMode(g_bLinearFiltering ? GL_LINEAR : GL_NEAREST,
- g_bLinearFiltering ? GL_LINEAR : GL_NEAREST);
- break;
- case ' ':
- g_bRunning = !g_bRunning;
- break;
- default:
- break;
- }
- glutPostRedisplay();
- }
- void idle()
- {
- glutPostRedisplay();
- }
- void reshape(int window_x, int window_y)
- {
- printf("reshape() glViewport(%d, %d, %d, %d)\n", 0, 0, window_x, window_y);
- glViewport(0, 0, window_x, window_y);
- glMatrixMode(GL_MODELVIEW);
- glLoadIdentity();
- glMatrixMode(GL_PROJECTION);
- glLoadIdentity();
- glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
- }
- // Initialize OpenGL Resources
- bool initGL(int argc, char **argv, int *pbTCC)
- {
- int dev, device_count = 0;
- bool bSpecifyDevice=false;
- char device_name[256];
- // Check for a min spec of Compute 1.1 capability before running
- checkCudaErrors(cuDeviceGetCount(&device_count));
- for (int i=0; i < argc; i++)
- {
- int string_start = 0;
- while (argv[i][string_start++] != '-');
- char *string_argv = &argv[i][string_start];
- if (!STRNCASECMP(string_argv, "device=", 7))
- {
- bSpecifyDevice = true;
- }
- }
- // If deviceID == 0, and there is more than 1 device, let's find the first available graphics GPU
- if (!bSpecifyDevice && device_count > 0)
- {
- for (int i=0; i < device_count; i++)
- {
- checkCudaErrors(cuDeviceGet(&dev, i));
- checkCudaErrors(cuDeviceGetName(device_name, 256, dev));
- int bSupported = checkCudaCapabilitiesDRV(1, 1, i);
- if (!bSupported)
- {
- printf(" -> GPU: \"%s\" does not meet the minimum spec of SM 1.1\n", device_name);
- printf(" -> A GPU with a minimum compute capability of SM 1.1 or higher is required.\n");
- return false;
- }
- #if CUDA_VERSION >= 3020
- checkCudaErrors(cuDeviceGetAttribute(pbTCC , CU_DEVICE_ATTRIBUTE_TCC_DRIVER, dev));
- printf(" -> GPU %d: < %s > driver mode is: %s\n", dev, device_name, *pbTCC ? "TCC" : "WDDM");
- if (*pbTCC)
- {
- continue;
- }
- else
- {
- g_DeviceID = i; // we choose an available WDDM display device
- }
- #else
- // We don't know for sure if this is a TCC device or not, if it is Tesla we will not run
- if (!STRNCASECMP(device_name, "Tesla", 5))
- {
- printf(" \"%s\" does not support %s\n", device_name, sSDKname);
- *pbTCC = 1;
- return false;
- }
- else
- {
- *pbTCC = 0;
- }
- #endif
- printf("\n");
- }
- }
- else
- {
- if ((g_DeviceID > (device_count-1)) || (g_DeviceID < 0))
- {
- printf(" >>> Invalid GPU Device ID=%d specified, only %d GPU device(s) are available.<<<\n", g_DeviceID, device_count);
- printf(" >>> Valid GPU ID (n) range is between [%d,%d]... Exiting... <<<\n", 0, device_count-1);
- return false;
- }
- // We are specifying a GPU device, check to see if it is TCC or not
- checkCudaErrors(cuDeviceGet(&dev, g_DeviceID));
- checkCudaErrors(cuDeviceGetName(device_name, 256, dev));
- #if CUDA_VERSION >= 3020
- checkCudaErrors(cuDeviceGetAttribute(pbTCC , CU_DEVICE_ATTRIBUTE_TCC_DRIVER, dev));
- printf(" -> GPU %d: < %s > driver mode is: %s\n", dev, device_name, *pbTCC ? "TCC" : "WDDM");
- #else
- // We don't know for sure if this is a TCC device or not, if it is Tesla we will not run
- if (!STRNCASECMP(device_name, "Tesla", 5))
- {
- printf(" \"%s\" does not support %s\n", device_name, sSDKname);
- *pbTCC = 1;
- return false;
- }
- else
- {
- *pbTCC = 0;
- }
- #endif
- }
- if (!(*pbTCC))
- {
- // initialize GLUT
- glutInit(&argc, argv);
- glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
- glutInitWindowSize(g_nWindowWidth, g_nWindowHeight);
- glutCreateWindow(sAppName);
- printf(">> initGL() creating window [%d x %d]\n", g_nWindowWidth, g_nWindowHeight);
- glutDisplayFunc(display);
- glutReshapeFunc(reshape);
- glutKeyboardFunc(keyboard);
- glutIdleFunc(idle);
- glewInit();
- if (!glewIsSupported("GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object"))
- {
- fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
- fprintf(stderr, "This sample requires:\n");
- fprintf(stderr, " OpenGL version 1.5\n");
- fprintf(stderr, " GL_ARB_vertex_buffer_object\n");
- fprintf(stderr, " GL_ARB_pixel_buffer_object\n");
- return true;
- }
- if (!g_bUseVsync)
- {
- setVSync(0);
- }
- }
- else
- {
- fprintf(stderr, "> %s is decoding w/o visualization\n", sSDKname);
- }
- return true;
- }
- // Initializes OpenGL Textures (allocation and initialization)
- bool
- initGLTexture(unsigned int nWidth, unsigned int nHeight)
- {
- g_pImageGL = new ImageGL(nWidth, nHeight,
- nWidth, nHeight,
- g_bIsProgressive,
- ImageGL::BGRA_PIXEL_FORMAT);
- g_pImageGL->clear(0x80);
- g_pImageGL->setCUDAcontext(g_oContext);
- g_pImageGL->setCUDAdevice(g_oDevice);
- return true;
- }
- bool
- loadVideoSource(const char *video_file,
- unsigned int &width , unsigned int &height,
- unsigned int &dispWidth, unsigned int &dispHeight)
- {
- std::auto_ptr<FrameQueue> apFrameQueue(new FrameQueue);
- std::auto_ptr<VideoSource> apVideoSource(new VideoSource(video_file, apFrameQueue.get()));
- // retrieve the video source (width,height)
- apVideoSource->getSourceDimensions(width, height);
- apVideoSource->getSourceDimensions(dispWidth, dispHeight);
- std::cout << apVideoSource->format() << std::endl;
- if (g_bFrameRepeat)
- {
- g_iRepeatFactor = (int)(60.0f / ceil((float)apVideoSource->format().frame_rate.numerator / (float)apVideoSource->format().frame_rate.denominator));
- printf("Frame Rate Playback Speed = %d fps\n", 60 / g_iRepeatFactor);
- }
- g_pFrameQueue = apFrameQueue.release();
- g_pVideoSource = apVideoSource.release();
- if (g_pVideoSource->format().codec == cudaVideoCodec_JPEG ||
- g_pVideoSource->format().codec == cudaVideoCodec_MPEG2)
- {
- g_eVideoCreateFlags = cudaVideoCreate_PreferCUDA;
- }
- bool IsProgressive = 0;
- g_pVideoSource->getProgressive(IsProgressive);
- return IsProgressive;
- }
- void
- initCudaVideo()
- {
- // bind the context lock to the CUDA context
- CUresult result = cuvidCtxLockCreate(&g_CtxLock, g_oContext);
- if (result != CUDA_SUCCESS)
- {
- printf("cuvidCtxLockCreate failed: %d\n", result);
- assert(0);
- }
- std::auto_ptr<VideoDecoder> apVideoDecoder(new VideoDecoder(g_pVideoSource->format(), g_oContext, g_eVideoCreateFlags, g_CtxLock));
- std::auto_ptr<VideoParser> apVideoParser(new VideoParser(apVideoDecoder.get(), g_pFrameQueue, &g_oContext));
- g_pVideoSource->setParser(*apVideoParser.get());
- g_pVideoParser = apVideoParser.release();
- g_pVideoDecoder = apVideoDecoder.release();
- // Create a Stream ID for handling Readback
- if (g_bReadback)
- {
- checkCudaErrors(cuStreamCreate(&g_ReadbackSID, 0));
- checkCudaErrors(cuStreamCreate(&g_KernelSID, 0));
- printf(">> initCudaVideo()\n");
- printf(" CUDA Streams (%s) <g_ReadbackSID = %p>\n", ((g_ReadbackSID == 0) ? "Disabled" : "Enabled"), g_ReadbackSID);
- printf(" CUDA Streams (%s) <g_KernelSID = %p>\n", ((g_KernelSID == 0) ? "Disabled" : "Enabled"), g_KernelSID);
- }
- }
- void
- freeCudaResources(bool bDestroyContext)
- {
- if (g_pVideoParser)
- {
- delete g_pVideoParser;
- }
- if (g_pVideoDecoder)
- {
- delete g_pVideoDecoder;
- }
- if (g_pVideoSource)
- {
- delete g_pVideoSource;
- }
- if (g_pFrameQueue)
- {
- delete g_pFrameQueue;
- }
- if (g_CtxLock)
- {
- checkCudaErrors(cuvidCtxLockDestroy(g_CtxLock));
- }
- if (g_oContext && bDestroyContext)
- {
- checkCudaErrors(cuCtxDestroy(g_oContext));
- g_oContext = NULL;
- }
- if (g_ReadbackSID)
- {
- checkCudaErrors(cuStreamDestroy(g_ReadbackSID));
- }
- if (g_KernelSID)
- {
- checkCudaErrors(cuStreamDestroy(g_KernelSID));
- }
- }
- // Run the Cuda part of the computation (if g_pFrameQueue is empty, then return false)
- bool copyDecodedFrameToTexture(unsigned int &nRepeats, int bUseInterop, int *pbIsProgressive)
- {
- CUVIDPARSERDISPINFO oDisplayInfo;
- if (g_pFrameQueue->dequeue(&oDisplayInfo))
- {
- CCtxAutoLock lck(g_CtxLock);
- // Push the current CUDA context (only if we are using CUDA decoding path)
- CUresult result = cuCtxPushCurrent(g_oContext);
- CUdeviceptr pDecodedFrame[2] = { 0, 0 };
- CUdeviceptr pInteropFrame[2] = { 0, 0 };
- int num_fields = (oDisplayInfo.progressive_frame ? (1) : (2+oDisplayInfo.repeat_first_field));
- *pbIsProgressive = oDisplayInfo.progressive_frame;
- g_bIsProgressive = oDisplayInfo.progressive_frame ? true : false;
- for (int active_field=0; active_field<num_fields; active_field++)
- {
- nRepeats = oDisplayInfo.repeat_first_field;
- CUVIDPROCPARAMS oVideoProcessingParameters;
- memset(&oVideoProcessingParameters, 0, sizeof(CUVIDPROCPARAMS));
- oVideoProcessingParameters.progressive_frame = oDisplayInfo.progressive_frame;
- oVideoProcessingParameters.second_field = active_field;
- oVideoProcessingParameters.top_field_first = oDisplayInfo.top_field_first;
- oVideoProcessingParameters.unpaired_field = (num_fields == 1);
- unsigned int nWidth = 0;
- unsigned int nHeight = 0;
- unsigned int nDecodedPitch = 0;
- // map decoded video frame to CUDA surface
- g_pVideoDecoder->mapFrame(oDisplayInfo.picture_index, &pDecodedFrame[active_field], &nDecodedPitch, &oVideoProcessingParameters);
- nWidth = PAD_ALIGN(g_pVideoDecoder->targetWidth() , 0x3F);
- nHeight = PAD_ALIGN(g_pVideoDecoder->targetHeight(), 0x0F);
- // map OpenGL PBO or CUDA memory
- size_t pFramePitch = 0;
- // If we are Encoding and this is the 1st Frame, we make sure we allocate system memory for readbacks
- if (g_bReadback && g_bFirstFrame && g_ReadbackSID)
- {
- CUresult result;
- checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameData[0], (nDecodedPitch * nHeight * 3 / 2)));
- checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameData[1], (nDecodedPitch * nHeight * 3 / 2)));
- g_bFirstFrame = false;
- if (result != CUDA_SUCCESS)
- {
- printf("cuMemAllocHost returned %d\n", (int)result);
- checkCudaErrors(result);
- }
- }
- // If streams are enabled, we can perform the readback to the host while the kernel is executing
- if (g_bReadback && g_ReadbackSID)
- {
- CUresult result = cuMemcpyDtoHAsync(g_bFrameData[active_field], pDecodedFrame[active_field], (nDecodedPitch * nHeight * 3 / 2), g_ReadbackSID);
- if (result != CUDA_SUCCESS)
- {
- printf("cuMemAllocHost returned %d\n", (int)result);
- checkCudaErrors(result);
- }
- }
- #if ENABLE_DEBUG_OUT
- printf("%s = %02d, PicIndex = %02d, OutputPTS = %08d\n",
- (oDisplayInfo.progressive_frame ? "Frame" : "Field"),
- g_DecodeFrameCount, oDisplayInfo.picture_index, oDisplayInfo.timestamp);
- #endif
- if (g_pImageGL)
- {
- // map the texture surface
- g_pImageGL->map(&pInteropFrame[active_field], &pFramePitch, active_field);
- pFramePitch = g_nWindowWidth * 4;
- }
- else
- {
- pInteropFrame[active_field] = g_pInteropFrame[active_field];
- //RMC modified next line
- pFramePitch = g_pVideoDecoder->targetWidth() * 4;
- }
- // perform post processing on the CUDA surface (performs colors space conversion and post processing)
- // comment this out if we inclue the line of code seen above
- cudaPostProcessFrame(&pDecodedFrame[active_field], nDecodedPitch, &pInteropFrame[active_field],
- pFramePitch, g_pCudaModule->getModule(), g_kernelNV12toARGB, g_KernelSID);
- //RMC ADD
- if ((g_FrameCapCount >= 0) && !g_bUseInterop)
- if (++g_FrameCapCount == g_FrameCapSelect){
- g_FrameCapPitch = pFramePitch;
- g_FrameCapHeight = nHeight;
- checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameCapData, (g_FrameCapPitch * g_FrameCapHeight)));
- checkCudaErrors(cuMemcpyDtoH(g_bFrameCapData, pInteropFrame[active_field], (g_FrameCapPitch * g_FrameCapHeight)));
- printf("frame %d captured\n", g_FrameCapSelect);
- saveFrameCap(g_bFrameCapData, g_FrameCapPitch, g_FrameCapHeight);
- }
- //END RMC ADD
- if (g_pImageGL)
- {
- // unmap the texture surface
- g_pImageGL->unmap(active_field);
- }
- // unmap video frame
- // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding)
- g_pVideoDecoder->unmapFrame(pDecodedFrame[active_field]);
- // release the frame, so it can be re-used in decoder
- g_pFrameQueue->releaseFrame(&oDisplayInfo);
- g_DecodeFrameCount++;
- }
- // Detach from the Current thread
- checkCudaErrors(cuCtxPopCurrent(NULL));
- }
- else
- {
- // Frame Queue has no frames, we don't compute FPS until we start
- return false;
- }
- // check if decoding has come to an end.
- // if yes, signal the app to shut down.
- if (!g_pVideoSource->isStarted() || g_pFrameQueue->isEndOfDecode())
- {
- // Let's free the Frame Data
- if (g_ReadbackSID && g_bFrameData)
- {
- checkCudaErrors(cuMemFreeHost((void *)g_bFrameData[0]));
- checkCudaErrors(cuMemFreeHost((void *)g_bFrameData[1]));
- g_bFrameData[0] = NULL;
- g_bFrameData[1] = NULL;
- }
- // Let's just stop, and allow the user to quit, so they can at least see the results
- g_pVideoSource->stop();
- // If we want to loop reload the video file and restart
- if (g_bLoop && !g_bAutoQuit)
- {
- reinitCudaResources();
- g_FrameCount = 0;
- g_DecodeFrameCount = 0;
- g_pVideoSource->start();
- }
- if (g_bAutoQuit)
- {
- g_bDone = true;
- }
- }
- return true;
- }
- // This is the CUDA stage for Video Post Processing. Last stage takes care of the NV12 to ARGB
- void
- cudaPostProcessFrame(CUdeviceptr *ppDecodedFrame, size_t nDecodedPitch,
- CUdeviceptr *ppInteropFrame, size_t pFramePitch,
- CUmodule cuModNV12toARGB,
- CUfunction fpCudaKernel, CUstream streamID)
- {
- uint32 nWidth = g_pVideoDecoder->targetWidth();
- uint32 nHeight = g_pVideoDecoder->targetHeight();
- // Upload the Color Space Conversion Matrices
- if (g_bUpdateCSC)
- {
- // CCIR 601/709
- float hueColorSpaceMat[9];
- setColorSpaceMatrix(g_eColorSpace, hueColorSpaceMat, g_nHue);
- updateConstantMemory_drvapi(cuModNV12toARGB, hueColorSpaceMat);
- if (!g_bUpdateAll)
- {
- g_bUpdateCSC = false;
- }
- }
- // TODO: Stage for handling video post processing
- // Final Stage: NV12toARGB color space conversion
- CUresult eResult;
- eResult = cudaLaunchNV12toARGBDrv(*ppDecodedFrame, nDecodedPitch,
- *ppInteropFrame, pFramePitch,
- nWidth, nHeight, fpCudaKernel, streamID);
- }
- // Draw the final result on the screen
- bool drawScene(int field_num)
- {
- bool hr = true;
- // Normal OpenGL rendering code
- // render image
- if (g_pImageGL)
- {
- g_pImageGL->render(field_num);
- }
- return hr;
- }
- //RMC ADD
- void SaveBMP ( BYTE* data, int w, int h )
- {
- FILE *f;
- unsigned char *img = NULL;
- int filesize = 54 + 3*w*h; //w is your image width, h is image height, both int
- if( img )
- free( img );
- img = (unsigned char *)malloc(3*w*h);
- memset(img,0,sizeof(img));
- for(int j=0; j<h; j++)
- {
- for(int i=0; i<w; i++)
- {
- unsigned char r,g,b;
- r = data[(((j*w)+i)*4)+2];
- g = data[(((j*w)+i)*4)+1];
- b = data[(((j*w)+i)*4)+0];
- img[(((j*w)+i)*3)+2] = (r);
- img[(((j*w)+i)*3)+1] = (g);
- img[(((j*w)+i)*3)+0] = (b);
- }
- }
- unsigned char bmpfileheader[14] = {'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0};
- unsigned char bmpinfoheader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0};
- unsigned char bmppad[3] = {0,0,0};
- bmpfileheader[ 2] = (unsigned char)(filesize );
- bmpfileheader[ 3] = (unsigned char)(filesize>> 8);
- bmpfileheader[ 4] = (unsigned char)(filesize>>16);
- bmpfileheader[ 5] = (unsigned char)(filesize>>24);
- bmpinfoheader[ 4] = (unsigned char)( w );
- bmpinfoheader[ 5] = (unsigned char)( w>> 8);
- bmpinfoheader[ 6] = (unsigned char)( w>>16);
- bmpinfoheader[ 7] = (unsigned char)( w>>24);
- bmpinfoheader[ 8] = (unsigned char)( h );
- bmpinfoheader[ 9] = (unsigned char)( h>> 8);
- bmpinfoheader[10] = (unsigned char)( h>>16);
- bmpinfoheader[11] = (unsigned char)( h>>24);
- f = fopen(g_FrameCapFile,"wb");
- fwrite(bmpfileheader,1,14,f);
- fwrite(bmpinfoheader,1,40,f);
- for(int i=0; i<h; i++)
- {
- fwrite(img+(w*(h-i-1)*3),3,w,f);
- fwrite(bmppad,1,(4-(w*3)%4)%4,f);
- }
- fclose(f);
- printf("saved frame capture file width = %d, height = %d\n", w, h);
- }
- void saveFrameCap(BYTE *data, size_t pitch, size_t height){
- if (data && !g_bUseInterop)
- SaveBMP(data, pitch/4, height);
- }
- //END RMC ADD
- // Release all previously initd objects
- bool cleanup(bool bDestroyContext)
- {
- if (bDestroyContext)
- {
- // Attach the CUDA Context (so we may properly free memroy)
- checkCudaErrors(cuCtxPushCurrent(g_oContext));
- if (g_pInteropFrame[0])
- {
- checkCudaErrors(cuMemFree(g_pInteropFrame[0]));
- }
- if (g_pInteropFrame[1])
- {
- checkCudaErrors(cuMemFree(g_pInteropFrame[1]));
- }
- // Detach from the Current thread
- checkCudaErrors(cuCtxPopCurrent(NULL));
- }
- if (g_pImageGL)
- {
- delete g_pImageGL;
- g_pImageGL = NULL;
- }
- freeCudaResources(bDestroyContext);
- return true;
- }
- // Launches the CUDA kernels to fill in the texture data
- void renderVideoFrame(int bUseInterop)
- {
- static unsigned int nRepeatFrame = 0;
- int repeatFactor = g_iRepeatFactor;
- int bIsProgressive = 1, bFPSComputed = 0;
- bool bFramesDecoded = false;
- if (0 != g_pFrameQueue)
- {
- // if not running, we simply don't copy new frames from the decoder
- if (!g_bDeviceLost && g_bRunning)
- {
- bFramesDecoded = copyDecodedFrameToTexture(nRepeatFrame, bUseInterop, &bIsProgressive);
- }
- }
- else
- {
- return;
- }
- if (bFramesDecoded)
- {
- while (repeatFactor-- > 0)
- {
- if (g_bUseDisplay && bUseInterop)
- {
- // We will always draw field/frame 0
- drawScene(0);
- glutSwapBuffers();
- if (!repeatFactor)
- {
- computeFPS(bUseInterop);
- }
- // If interlaced mode, then we will need to draw field 1 separately
- if (!bIsProgressive)
- {
- drawScene(1);
- glutSwapBuffers();
- if (!repeatFactor)
- {
- computeFPS(bUseInterop);
- }
- }
- bFPSComputed = 1;
- }
- // Pass the Windows handle to show Frame Rate on the window title
- if (!bFPSComputed && !repeatFactor)
- {
- computeFPS(bUseInterop);
- }
- if (g_bUseDisplay && bUseInterop)
- {
- glutReportErrors();
- }
- }
- }
- if (bFramesDecoded && g_bFrameStep)
- {
- if (g_bRunning)
- {
- g_bRunning = false;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment