Guest User

Untitled

a guest
Aug 24th, 2013
772
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 44.42 KB | None | 0 0
  1. /*
  2. * Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
  3. *
  4. * Please refer to the NVIDIA end user license agreement (EULA) associated
  5. * with this source code for terms and conditions that govern your use of
  6. * this software. Any use, reproduction, disclosure, or distribution of
  7. * this software and related documentation outside the terms of the EULA
  8. * is strictly prohibited.
  9. *
  10. */
  11.  
  12. /* This example demonstrates how to use the Video Decode Library with CUDA
  13. * bindings to interop between NVCUVID(CUDA) and OpenGL (PBOs). Post-Processing
  14. * video (de-interlacing) is supported with this sample.
  15. */
  16.  
  17. // OpenGL Graphics includes
  18. #include <GL/glew.h>
  19. #if defined(__APPLE__) || defined(__MACOSX)
  20. #include <GLUT/glut.h>
  21. #else
  22. #include <GL/freeglut.h>
  23. #endif
  24.  
  25. // CUDA Header includes
  26. #include <cuda.h>
  27. #include <cudaGL.h>
  28.  
  29. // CUDA utilities and system includes
  30. #include <helper_functions.h>
  31. #include <helper_cuda.h>
  32. #include <helper_cuda_drvapi.h>
  33. #include <helper_cuda_gl.h>
  34.  
  35. // Includes
  36. #include <stdlib.h>
  37. #include <stdio.h>
  38. #include <string.h>
  39. #include <math.h>
  40. #include <memory>
  41. #include <iostream>
  42. #include <cassert>
  43.  
  44. // cudaDecodeGL related helper functions
  45. #include "FrameQueue.h"
  46. #include "VideoSource.h"
  47. #include "VideoParser.h"
  48. #include "VideoDecoder.h"
  49. #include "ImageGL.h"
  50.  
  51. #include "cudaProcessFrame.h"
  52. #include "cudaModuleMgr.h"
  53.  
  54. const char *sAppName = "CUDA/OpenGL Video Decode";
  55. const char *sAppFilename = "cudaDecodeGL";
  56. const char *sSDKname = "cudaDecodeGL";
  57.  
  58. #define VIDEO_SOURCE_FILE "plush1_720p_10s.m2v"
  59.  
  60. #ifdef _DEBUG
  61. #define ENABLE_DEBUG_OUT 0
  62. #else
  63. #define ENABLE_DEBUG_OUT 0
  64. #endif
  65.  
  66. StopWatchInterface *frame_timer = NULL,
  67. *global_timer = NULL;
  68.  
  69. // RMC ADD
  70. int g_FrameCapCount = 0; // added to capture a specific frame when readback is enabled
  71. // set g_FrameCount to some negative value to disable Frame capture to file
  72. // otherwise set to zero
  73. // g_bUseInterop must also be zero for FrameCap
  74. int g_FrameCapSelect = 37; // select which frame to capture to a file
  75. int g_FrameCapPitch = 0;
  76. int g_FrameCapHeight = 0;
  77. char g_FrameCapFile[] = "framecap.bmp";
  78. // END RMC ADD
  79.  
  80. int g_DeviceID = 0;
  81. bool g_bWindowed = true;
  82. bool g_bDeviceLost = false;
  83. bool g_bDone = false;
  84. bool g_bRunning = false;
  85. bool g_bAutoQuit = false;
  86. bool g_bUseVsync = false;
  87. bool g_bFrameRepeat= false;
  88. bool g_bFrameStep = false;
  89. bool g_bQAReadback = false;
  90. bool g_bGLVerify = false;
  91. bool g_bFirstFrame = true;
  92. bool g_bLoop = false;
  93. bool g_bUpdateCSC = true;
  94. bool g_bUpdateAll = false;
  95. bool g_bLinearFiltering = false;
  96. bool g_bUseDisplay = true; // this flag enables/disables video on the window
  97. bool g_bUseInterop = true;
  98. bool g_bReadback = false;
  99. bool g_bIsProgressive = true; // assume it is progressive, unless otherwise noted
  100. bool g_bException = false;
  101. bool g_bWaived = false;
  102.  
  103. int g_iRepeatFactor = 1; // 1:1 assumes no frame repeats
  104.  
  105. int *pArgc = NULL;
  106. char **pArgv = NULL;
  107.  
  108. cudaVideoCreateFlags g_eVideoCreateFlags = cudaVideoCreate_PreferCUVID;
  109. CUvideoctxlock g_CtxLock = NULL;
  110.  
  111. float present_fps, decoded_fps, total_time = 0.0f;
  112.  
  113. // These are CUDA function pointers to the CUDA kernels
  114. CUmoduleManager *g_pCudaModule;
  115.  
  116. CUmodule cuModNV12toARGB = 0;
  117. CUfunction g_kernelNV12toARGB = 0;
  118. CUfunction g_kernelPassThru = 0;
  119.  
  120. CUcontext g_oContext = 0;
  121. CUdevice g_oDevice = 0;
  122.  
  123. CUstream g_ReadbackSID = 0, g_KernelSID = 0;
  124.  
  125. eColorSpace g_eColorSpace = ITU601;
  126. float g_nHue = 0.0f;
  127.  
  128. // System Memory surface we want to readback to
  129. BYTE *g_bFrameData[2] = { 0, 0 };
  130. // RMC ADD
  131. BYTE *g_bFrameCapData = 0;
  132. // END RMC ADD
  133. FrameQueue *g_pFrameQueue = 0;
  134. VideoSource *g_pVideoSource = 0;
  135. VideoParser *g_pVideoParser = 0;
  136. VideoDecoder *g_pVideoDecoder = 0;
  137.  
  138. ImageGL *g_pImageGL = 0; // if we're using OpenGL
  139. CUdeviceptr g_pInteropFrame[2] = { 0, 0 }; // if we're using CUDA malloc
  140.  
  141. std::string sFileName;
  142.  
  143. char exec_path[256];
  144.  
  145. unsigned int g_nWindowWidth = 0;
  146. unsigned int g_nWindowHeight = 0;
  147.  
  148. unsigned int g_nClientAreaWidth = 0;
  149. unsigned int g_nClientAreaHeight = 0;
  150.  
  151. unsigned int g_nVideoWidth = 0;
  152. unsigned int g_nVideoHeight = 0;
  153.  
  154. unsigned int g_FrameCount = 0;
  155. unsigned int g_DecodeFrameCount = 0;
  156. unsigned int g_fpsCount = 0; // FPS count for averaging
  157. unsigned int g_fpsLimit = 16; // FPS limit for sampling timer;
  158.  
  159. // Forward declarations
  160. //RMC ADD
  161. void saveFrameCap(BYTE *data, size_t pitch, size_t height);
  162. //END RMC ADD
  163. bool initGL(int argc, char **argv, int *pbTCC);
  164. bool initGLTexture(unsigned int nWidth, unsigned int nHeight);
  165. bool loadVideoSource(const char *video_file,
  166. unsigned int &width, unsigned int &height,
  167. unsigned int &dispWidth, unsigned int &dispHeight);
  168. void initCudaVideo();
  169.  
  170. void freeCudaResources(bool bDestroyContext);
  171.  
  172. bool copyDecodedFrameToTexture(unsigned int &nRepeats, int bUseInterop, int *pbIsProgressive);
  173. void cudaPostProcessFrame(CUdeviceptr *ppDecodedFrame, size_t nDecodedPitch,
  174. CUdeviceptr *ppInteropFrame, size_t pFramePitch,
  175. CUmodule cuModNV12toARGB,
  176. CUfunction fpCudaKernel, CUstream streamID);
  177. bool drawScene(int field_num);
  178. bool cleanup(bool bDestroyContext);
  179. bool initCudaResources(int argc, char **argv, int *bTCC);
  180.  
  181. void renderVideoFrame(int bUseInterop);
  182.  
  183. #ifdef WIN32
  184. typedef bool (APIENTRY *PFNWGLSWAPINTERVALFARPROC)(int);
  185. PFNWGLSWAPINTERVALFARPROC wglSwapIntervalEXT = 0;
  186.  
  187. // This allows us to turn off vsync for OpenGL
  188. void setVSync(int interval)
  189. {
  190. const GLubyte *extensions = glGetString(GL_EXTENSIONS);
  191.  
  192. if (strstr((const char *)extensions, "WGL_EXT_swap_control") == 0)
  193. {
  194. return; // Error: WGL_EXT_swap_control extension not supported on your computer.\n");
  195. }
  196. else
  197. {
  198. wglSwapIntervalEXT = (PFNWGLSWAPINTERVALFARPROC)wglGetProcAddress("wglSwapIntervalEXT");
  199.  
  200. if (wglSwapIntervalEXT)
  201. {
  202. wglSwapIntervalEXT(interval);
  203. }
  204. }
  205. }
  206.  
  207. #ifndef STRCASECMP
  208. #define STRCASECMP _stricmp
  209. #endif
  210. #ifndef STRNCASECMP
  211. #define STRNCASECMP _strnicmp
  212. #endif
  213.  
  214. #else
  215. void setVSync(int interval)
  216. {
  217. }
  218.  
  219. #ifndef STRCASECMP
  220. #define STRCASECMP strcasecmp
  221. #endif
  222. #ifndef STRNCASECMP
  223. #define STRNCASECMP strncasecmp
  224. #endif
  225.  
  226. #endif
  227.  
  228. void printStatistics()
  229. {
  230. int hh, mm, ss, msec;
  231.  
  232. present_fps = 1.f / (total_time / (g_FrameCount * 1000.f));
  233. decoded_fps = 1.f / (total_time / (g_DecodeFrameCount * 1000.f));
  234.  
  235. msec = ((int)total_time % 1000);
  236. ss = (int)(total_time/1000) % 60;
  237. mm = (int)(total_time/(1000*60)) % 60;
  238. hh = (int)(total_time/(1000*60*60)) % 60;
  239.  
  240. printf("\n[%s] statistics\n", sAppFilename);
  241. printf("\t Video Length (hh:mm:ss.msec) = %02d:%02d:%02d.%03d\n", hh, mm, ss, msec);
  242.  
  243. printf("\t Frames Presented (inc repeats) = %d\n", g_FrameCount);
  244. printf("\t Average Present Rate (fps) = %4.2f\n", present_fps);
  245.  
  246. printf("\t Frames Decoded (hardware) = %d\n", g_DecodeFrameCount);
  247. printf("\t Average Rate of Decoding (fps) = %4.2f\n", decoded_fps);
  248. }
  249.  
  250. void computeFPS(int bUseInterop)
  251. {
  252. sdkStopTimer(&frame_timer);
  253.  
  254. if (g_bRunning)
  255. {
  256. g_fpsCount++;
  257.  
  258. if (!g_pFrameQueue->isEndOfDecode())
  259. {
  260. g_FrameCount++;
  261. }
  262. }
  263.  
  264. char sFPS[256];
  265. std::string sDecodeStatus;
  266.  
  267. if (g_bDeviceLost)
  268. {
  269. sDecodeStatus = "DeviceLost!\0";
  270. sprintf(sFPS, "%s [%s] - [%s %d]",
  271. sSDKname, sDecodeStatus.c_str(),
  272. (g_bIsProgressive ? "Frame" : "Field"),
  273. g_DecodeFrameCount);
  274.  
  275. if (!g_bQAReadback || g_bGLVerify)
  276. {
  277. glutSetWindowTitle(sFPS);
  278. }
  279.  
  280. sdkResetTimer(&frame_timer);
  281. g_fpsCount = 0;
  282. return;
  283. }
  284.  
  285. if (g_pFrameQueue->isEndOfDecode())
  286. {
  287. sDecodeStatus = "STOP (End of File)\0";
  288.  
  289. // we only want to record this once
  290. if (total_time == 0.0f)
  291. {
  292. total_time = sdkGetTimerValue(&global_timer);
  293. }
  294.  
  295. sdkStopTimer(&global_timer);
  296.  
  297. if (g_bAutoQuit)
  298. {
  299. g_bRunning = false;
  300. g_bDone = true;
  301. }
  302.  
  303. }
  304. else
  305. {
  306. if (!g_bRunning)
  307. {
  308. sDecodeStatus = "PAUSE\0";
  309. sprintf(sFPS, "%s [%s] - [%s %d] - Video Display %s / Vsync %s",
  310. sAppFilename, sDecodeStatus.c_str(),
  311. (g_bIsProgressive ? "Frame" : "Field"),
  312. g_DecodeFrameCount,
  313. g_bUseDisplay ? "ON" : "OFF",
  314. g_bUseVsync ? "ON" : "OFF");
  315.  
  316. if (bUseInterop && (!g_bQAReadback || g_bGLVerify))
  317. {
  318. glutSetWindowTitle(sFPS);
  319. }
  320.  
  321. }
  322. else
  323. {
  324. if (g_bFrameStep)
  325. {
  326. sDecodeStatus = "STEP\0";
  327. }
  328. else
  329. {
  330. sDecodeStatus = "PLAY\0";
  331. }
  332. }
  333.  
  334. if (g_fpsCount == g_fpsLimit)
  335. {
  336. float ifps = 1.f / (sdkGetAverageTimerValue(&frame_timer) / 1000.f);
  337.  
  338. sprintf(sFPS, "%s [%s] - [%3.1f fps, %s %d] - Video Display %s / Vsync %s",
  339. sAppFilename, sDecodeStatus.c_str(), ifps,
  340. (g_bIsProgressive ? "Frame" : "Field"),
  341. g_DecodeFrameCount,
  342. g_bUseDisplay ? "ON" : "OFF",
  343. g_bUseVsync ? "ON" : "OFF");
  344.  
  345. if (bUseInterop)
  346. {
  347. glutSetWindowTitle(sFPS);
  348. }
  349.  
  350. printf("[%s] - [%s: %04d, %04.1f fps, frame time: %04.2f (ms) ]\n",
  351. sAppFilename,
  352. (g_bIsProgressive ? "Frame" : "Field"),
  353. g_FrameCount, ifps, 1000.f/ifps);
  354.  
  355. sdkResetTimer(&frame_timer);
  356. g_fpsCount = 0;
  357. }
  358. }
  359.  
  360. if (g_bDone && g_bAutoQuit && bUseInterop)
  361. {
  362. printStatistics();
  363.  
  364. cleanup(true);
  365. exit(EXIT_SUCCESS);
  366. }
  367.  
  368. sdkStartTimer(&frame_timer);
  369. }
  370.  
  371. bool initCudaResources(int argc, char **argv, int *bTCC)
  372. {
  373. printf("\n");
  374.  
  375. for (int i=0; i < argc; i++)
  376. {
  377. printf("argv[%d] = %s\n", i, argv[i]);
  378. }
  379.  
  380. printf("\n");
  381.  
  382. CUdevice cuda_device;
  383.  
  384. // Device is specified at the command line, we need to check if this it TCC or not, and then call the
  385. // appropriate TCC/WDDM findCudaDevice in order to initialize the CUDA device
  386. if (checkCmdLineFlag(argc, (const char **)argv, "device"))
  387. {
  388. cuda_device = getCmdLineArgumentInt(argc, (const char **) argv, "device");
  389.  
  390. cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
  391. checkCudaErrors(cuDeviceGetAttribute(bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, cuda_device));
  392. if (*bTCC)
  393. {
  394. char name[100];
  395. cuDeviceGetName(name, 100, cuda_device);
  396. printf("CUDA device=%d [%s] is a TCC Compute device\n", cuda_device, name);
  397. g_bUseInterop = false;
  398. }
  399. else
  400. {
  401. if (g_bUseInterop)
  402. {
  403. initGL(argc, argv, bTCC);
  404. cuda_device = findCudaGLDeviceDRV(argc, (const char **)argv);
  405. }
  406. else
  407. {
  408. cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
  409. }
  410. }
  411.  
  412. if (cuda_device < 0)
  413. {
  414. printf("No CUDA Capable devices found, exiting...\n");
  415. exit(EXIT_SUCCESS);
  416. }
  417.  
  418. checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
  419. }
  420. else
  421. {
  422. // If we want to use Graphics Interop, then choose the GPU that is capable
  423. if (g_bUseInterop && !(*bTCC))
  424. {
  425. initGL(argc, argv, bTCC);
  426. cuda_device = findCudaGLDeviceDRV(argc, (const char **)argv);
  427. checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
  428. }
  429. else
  430. {
  431. cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
  432. checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
  433. }
  434. }
  435.  
  436. // get compute capabilities and the devicename
  437. int major, minor;
  438. size_t totalGlobalMem;
  439. char deviceName[256];
  440. checkCudaErrors(cuDeviceComputeCapability(&major, &minor, g_oDevice));
  441. checkCudaErrors(cuDeviceGetName(deviceName, 256, g_oDevice));
  442. printf("> Using GPU Device: %s has SM %d.%d compute capability\n", deviceName, major, minor);
  443.  
  444. checkCudaErrors(cuDeviceTotalMem(&totalGlobalMem, g_oDevice));
  445. printf(" Total amount of global memory: %4.4f MB\n", (float)totalGlobalMem/(1024*1024));
  446.  
  447. // Create CUDA Device w/ GL interop (if WDDM), otherwise CUDA w/o interop (if TCC)
  448. // (use CU_CTX_BLOCKING_SYNC for better CPU synchronization)
  449. if (g_bUseInterop && !(*bTCC))
  450. {
  451. checkCudaErrors(cuGLCtxCreate(&g_oContext, CU_CTX_BLOCKING_SYNC, g_oDevice));
  452. }
  453. else
  454. {
  455. checkCudaErrors(cuCtxCreate(&g_oContext, CU_CTX_BLOCKING_SYNC, g_oDevice));
  456. }
  457.  
  458. // Initialize CUDA releated Driver API
  459. // Determine if we are running on a 32-bit or 64-bit OS and choose the right PTX file
  460. try
  461. {
  462. if (sizeof(void *) == 4)
  463. {
  464. g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_Win32.ptx", exec_path, 2, 2, 2);
  465. }
  466. else
  467. {
  468. g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_x64.ptx", exec_path, 2, 2, 2);
  469. }
  470. }
  471. catch (char const *p_file)
  472. {
  473. // If the CUmoduleManager constructor fails to load the PTX file, it will throw an exception
  474. printf("\n>> CUmoduleManager::Exception! %s not found!\n", p_file);
  475. printf(">> Please rebuild NV12ToARGB_drvapi.cu or re-install this sample.\n");
  476. return false;
  477. }
  478.  
  479.  
  480. g_pCudaModule->GetCudaFunction("NV12ToARGB_drvapi", &g_kernelNV12toARGB);
  481. g_pCudaModule->GetCudaFunction("Passthru_drvapi", &g_kernelPassThru);
  482.  
  483. /////////////////Change///////////////////////////
  484. // Now we create the CUDA resources and the CUDA decoder context
  485. initCudaVideo();
  486.  
  487. if (g_bUseInterop)
  488. {
  489. initGLTexture(g_pVideoDecoder->targetWidth(),
  490. g_pVideoDecoder->targetHeight());
  491. }
  492. else
  493. { //RMC Modified next 2 lines
  494. checkCudaErrors(cuMemAlloc(&g_pInteropFrame[0], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 4));
  495. checkCudaErrors(cuMemAlloc(&g_pInteropFrame[1], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 4));
  496. }
  497.  
  498. CUcontext cuCurrent = NULL;
  499. CUresult result = cuCtxPopCurrent(&cuCurrent);
  500.  
  501. if (result != CUDA_SUCCESS)
  502. {
  503. printf("cuCtxPopCurrent: %d\n", result);
  504. assert(0);
  505. }
  506.  
  507. /////////////////////////////////////////
  508. return ((g_pCudaModule && g_pVideoDecoder) ? true : false);
  509. }
  510.  
  511. bool reinitCudaResources()
  512. {
  513. // Free resources
  514. cleanup(false);
  515.  
  516. // Reinit VideoSource and Frame Queue
  517. g_bIsProgressive = loadVideoSource(sFileName.c_str(),
  518. g_nVideoWidth, g_nVideoHeight,
  519. g_nWindowWidth, g_nWindowHeight);
  520.  
  521. /////////////////Change///////////////////////////
  522. initCudaVideo();
  523. initGLTexture(g_pVideoDecoder->targetWidth(),
  524. g_pVideoDecoder->targetHeight());
  525. /////////////////////////////////////////
  526.  
  527. return S_OK;
  528. }
  529.  
  530. void displayHelp()
  531. {
  532. printf("\n");
  533. printf("%s - Help\n\n", sAppName);
  534. printf(" %s [parameters] [video_file]\n\n", sAppFilename);
  535. printf("Program parameters:\n");
  536. printf("\t-decodecuda - Use CUDA for MPEG-2 (Available with 64+ CUDA cores)\n");
  537. printf("\t-decodedxva - Use VP for MPEG-2, VC-1, H.264 decode.\n");
  538. printf("\t-decodecuvid - Use VP for MPEG-2, VC-1, H.264 decode (optimized)\n");
  539. printf("\t-vsync - Enable vertical sync.\n");
  540. printf("\t-novsync - Disable vertical sync.\n");
  541. printf("\t-repeatframe - Enable frame repeats.\n");
  542. printf("\t-updateall - always update CSC matrices.\n");
  543. printf("\t-displayvideo - display video frames on the window\n");
  544. printf("\t-nointerop - create the CUDA context w/o using graphics interop\n");
  545. printf("\t-readback - enable readback of frames to system memory\n");
  546. printf("\t-device=n - choose a specific GPU device to decode video with\n");
  547. }
  548.  
  549. void parseCommandLineArguments(int argc, char *argv[])
  550. {
  551. char *video_file;
  552.  
  553. printf("Command Line Arguments:\n");
  554.  
  555. for (int n=0; n < argc; n++)
  556. {
  557. printf("argv[%d] = %s\n", n, argv[n]);
  558. }
  559.  
  560. if (checkCmdLineFlag(argc, (const char **)argv, "help"))
  561. {
  562. displayHelp();
  563. exit(EXIT_SUCCESS);
  564. }
  565.  
  566. if (checkCmdLineFlag(argc, (const char **)argv, "decodecuda"))
  567. {
  568. g_eVideoCreateFlags = cudaVideoCreate_PreferCUDA;
  569. }
  570.  
  571. if (checkCmdLineFlag(argc, (const char **)argv, "decodedxva"))
  572. {
  573. g_eVideoCreateFlags = cudaVideoCreate_PreferDXVA;
  574. }
  575.  
  576. if (checkCmdLineFlag(argc, (const char **)argv, "decodecuvid"))
  577. {
  578. g_eVideoCreateFlags = cudaVideoCreate_PreferCUVID;
  579. }
  580.  
  581. if (checkCmdLineFlag(argc, (const char **)argv, "vsync"))
  582. {
  583. g_bUseVsync = true;
  584. }
  585.  
  586. if (checkCmdLineFlag(argc, (const char **)argv, "novsync"))
  587. {
  588. g_bUseVsync = false;
  589. }
  590.  
  591. if (checkCmdLineFlag(argc, (const char **)argv, "repeatframe"))
  592. {
  593. g_bFrameRepeat = true;
  594. }
  595.  
  596. if (checkCmdLineFlag(argc, (const char **)argv, "framestep"))
  597. {
  598. g_bFrameStep = true;
  599. g_bUseDisplay = true;
  600. g_fpsLimit = 1;
  601. }
  602.  
  603. if (checkCmdLineFlag(argc, (const char **)argv, "updateall"))
  604. {
  605. g_bUpdateAll = true;
  606. }
  607.  
  608. if (checkCmdLineFlag(argc, (const char **)argv, "displayvideo"))
  609. {
  610. g_bUseDisplay = true;
  611. g_bUseInterop = true;
  612. }
  613.  
  614. if (checkCmdLineFlag(argc, (const char **)argv, "nointerop"))
  615. {
  616. g_bUseInterop = false;
  617. }
  618.  
  619. if (checkCmdLineFlag(argc, (const char **)argv, "readback"))
  620. {
  621. g_bReadback = true;
  622. }
  623.  
  624. if (checkCmdLineFlag(argc, (const char **)argv, "device"))
  625. {
  626. g_DeviceID = getCmdLineArgumentInt(argc, (const char **)argv, "device");
  627. // g_bUseDisplay = true;
  628. }
  629.  
  630. if (g_bUseDisplay == false)
  631. {
  632. g_bQAReadback = true;
  633. }
  634.  
  635. if (g_bLoop == false)
  636. {
  637. g_bAutoQuit = true;
  638. }
  639.  
  640. // Search all command file parameters for video files with extensions:
  641. // mp4, avc, mkv, 264, h264. vc1, wmv, mp2, mpeg2, mpg
  642. char *file_ext = NULL;
  643. for (int i=1; i < argc; i++)
  644. {
  645. if (getFileExtension(argv[i], &file_ext) > 0)
  646. {
  647. video_file = argv[i];
  648. break;
  649. }
  650. }
  651.  
  652. // We load the default video file for the SDK sample
  653. if (file_ext == NULL)
  654. {
  655. video_file = sdkFindFilePath(VIDEO_SOURCE_FILE, argv[0]);
  656. }
  657.  
  658. // Now verify the video file is legit
  659. FILE *fp = fopen(video_file, "r");
  660. if (video_file == NULL && fp == NULL)
  661. {
  662. printf("[%s]: unable to find file: <%s>\nExiting...\n", sAppFilename, VIDEO_SOURCE_FILE);
  663. exit(EXIT_FAILURE);
  664. }
  665. if (fp)
  666. {
  667. fclose(fp);
  668. }
  669.  
  670. // default video file loaded by this sample
  671. sFileName = video_file;
  672.  
  673. // store the current path so we can reinit the CUDA context
  674. strcpy(exec_path, argv[0]);
  675.  
  676. printf("[%s]: input file: <%s>\n", sAppFilename, sFileName.c_str());
  677. }
  678.  
  679.  
  680. int main(int argc, char *argv[])
  681. {
  682. printf("[%s]\n", sAppName);
  683.  
  684. sdkCreateTimer(&frame_timer);
  685. sdkResetTimer(&frame_timer);
  686.  
  687. sdkCreateTimer(&global_timer);
  688. sdkResetTimer(&global_timer);
  689.  
  690. // parse the command line arguments
  691. parseCommandLineArguments(argc, argv);
  692.  
  693. // Find out the video size
  694. g_bIsProgressive = loadVideoSource(sFileName.c_str(),
  695. g_nVideoWidth, g_nVideoHeight,
  696. g_nWindowWidth, g_nWindowHeight);
  697.  
  698. // Determine the proper window size needed to create the correct *client* area
  699. // that is of the size requested by m_dimensions.
  700. #ifdef WIN32
  701. RECT adjustedWindowSize;
  702. DWORD dwWindowStyle = WS_OVERLAPPEDWINDOW | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
  703. SetRect(&adjustedWindowSize, 0, 0, g_nVideoWidth , g_nVideoHeight);
  704. AdjustWindowRect(&adjustedWindowSize, dwWindowStyle, false);
  705. #endif
  706.  
  707. g_nVideoWidth = PAD_ALIGN(g_nVideoWidth , 0x3F);
  708. g_nVideoHeight = PAD_ALIGN(g_nVideoHeight , 0x0F);
  709.  
  710. // Initialize the CUDA Device
  711. cuInit(0);
  712.  
  713. // Initialize CUDA and try to connect with an OpenGL context
  714. // Other video memory resources will be available
  715. int bTCC = 0;
  716. if (initCudaResources(argc, argv, &bTCC) == false)
  717. {
  718. g_bAutoQuit = true;
  719. g_bException = true;
  720. g_bWaived = true;
  721. goto ExitApp;
  722. }
  723.  
  724. g_pVideoSource->start();
  725. g_bRunning = true;
  726.  
  727. sdkStartTimer(&global_timer);
  728. sdkResetTimer(&global_timer);
  729.  
  730. if (!g_bUseInterop)
  731. {
  732. // On this case we drive the display with a while loop (no openGL calls)
  733. while (g_pVideoSource->isStarted() && !g_pFrameQueue->isEndOfDecode())
  734. {
  735. renderVideoFrame(g_bUseInterop);
  736. }
  737. }
  738. else
  739. {
  740. glutMainLoop();
  741. }
  742.  
  743. g_pFrameQueue->endDecode();
  744. g_pVideoSource->stop();
  745.  
  746. printStatistics();
  747.  
  748. ExitApp:
  749. // clean up CUDA and OpenGL resources
  750. cleanup(g_bWaived ? false : true);
  751.  
  752. if (g_bWaived)
  753. {
  754. exit(EXIT_SUCCESS);
  755. }
  756. else
  757. {
  758. exit(g_bException ? EXIT_FAILURE : EXIT_SUCCESS);
  759. }
  760.  
  761. return 0;
  762. }
  763.  
  764.  
  765. // display results using OpenGL
  766. void display()
  767. {
  768. renderVideoFrame(true);
  769. }
  770.  
  771.  
  772. void keyboard(unsigned char key, int x, int y)
  773. {
  774. switch (key)
  775. {
  776. case 27:
  777. if (g_pFrameQueue)
  778. {
  779. g_pFrameQueue->endDecode();
  780. }
  781.  
  782. if (g_pVideoSource)
  783. {
  784. g_pVideoSource->stop();
  785. }
  786.  
  787. printStatistics();
  788. cleanup(true);
  789. exit(EXIT_FAILURE);
  790. break;
  791.  
  792. case 'F':
  793. case 'f':
  794. g_bLinearFiltering = !g_bLinearFiltering;
  795.  
  796. if (g_pImageGL)
  797. g_pImageGL->setTextureFilterMode(g_bLinearFiltering ? GL_LINEAR : GL_NEAREST,
  798. g_bLinearFiltering ? GL_LINEAR : GL_NEAREST);
  799.  
  800. break;
  801.  
  802. case ' ':
  803. g_bRunning = !g_bRunning;
  804. break;
  805.  
  806. default:
  807. break;
  808. }
  809.  
  810. glutPostRedisplay();
  811. }
  812.  
  813. void idle()
  814. {
  815. glutPostRedisplay();
  816. }
  817.  
  818. void reshape(int window_x, int window_y)
  819. {
  820. printf("reshape() glViewport(%d, %d, %d, %d)\n", 0, 0, window_x, window_y);
  821.  
  822. glViewport(0, 0, window_x, window_y);
  823.  
  824. glMatrixMode(GL_MODELVIEW);
  825. glLoadIdentity();
  826.  
  827. glMatrixMode(GL_PROJECTION);
  828. glLoadIdentity();
  829. glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
  830. }
  831.  
  832. // Initialize OpenGL Resources
  833. bool initGL(int argc, char **argv, int *pbTCC)
  834. {
  835. int dev, device_count = 0;
  836. bool bSpecifyDevice=false;
  837. char device_name[256];
  838.  
  839. // Check for a min spec of Compute 1.1 capability before running
  840. checkCudaErrors(cuDeviceGetCount(&device_count));
  841.  
  842. for (int i=0; i < argc; i++)
  843. {
  844. int string_start = 0;
  845.  
  846. while (argv[i][string_start++] != '-');
  847.  
  848. char *string_argv = &argv[i][string_start];
  849.  
  850. if (!STRNCASECMP(string_argv, "device=", 7))
  851. {
  852. bSpecifyDevice = true;
  853. }
  854. }
  855.  
  856. // If deviceID == 0, and there is more than 1 device, let's find the first available graphics GPU
  857. if (!bSpecifyDevice && device_count > 0)
  858. {
  859. for (int i=0; i < device_count; i++)
  860. {
  861. checkCudaErrors(cuDeviceGet(&dev, i));
  862. checkCudaErrors(cuDeviceGetName(device_name, 256, dev));
  863.  
  864. int bSupported = checkCudaCapabilitiesDRV(1, 1, i);
  865.  
  866. if (!bSupported)
  867. {
  868. printf(" -> GPU: \"%s\" does not meet the minimum spec of SM 1.1\n", device_name);
  869. printf(" -> A GPU with a minimum compute capability of SM 1.1 or higher is required.\n");
  870. return false;
  871. }
  872.  
  873. #if CUDA_VERSION >= 3020
  874. checkCudaErrors(cuDeviceGetAttribute(pbTCC , CU_DEVICE_ATTRIBUTE_TCC_DRIVER, dev));
  875. printf(" -> GPU %d: < %s > driver mode is: %s\n", dev, device_name, *pbTCC ? "TCC" : "WDDM");
  876.  
  877. if (*pbTCC)
  878. {
  879. continue;
  880. }
  881. else
  882. {
  883. g_DeviceID = i; // we choose an available WDDM display device
  884. }
  885.  
  886. #else
  887.  
  888. // We don't know for sure if this is a TCC device or not, if it is Tesla we will not run
  889. if (!STRNCASECMP(device_name, "Tesla", 5))
  890. {
  891. printf(" \"%s\" does not support %s\n", device_name, sSDKname);
  892. *pbTCC = 1;
  893. return false;
  894. }
  895. else
  896. {
  897. *pbTCC = 0;
  898. }
  899.  
  900. #endif
  901. printf("\n");
  902. }
  903. }
  904. else
  905. {
  906. if ((g_DeviceID > (device_count-1)) || (g_DeviceID < 0))
  907. {
  908. printf(" >>> Invalid GPU Device ID=%d specified, only %d GPU device(s) are available.<<<\n", g_DeviceID, device_count);
  909. printf(" >>> Valid GPU ID (n) range is between [%d,%d]... Exiting... <<<\n", 0, device_count-1);
  910. return false;
  911. }
  912.  
  913. // We are specifying a GPU device, check to see if it is TCC or not
  914. checkCudaErrors(cuDeviceGet(&dev, g_DeviceID));
  915. checkCudaErrors(cuDeviceGetName(device_name, 256, dev));
  916.  
  917. #if CUDA_VERSION >= 3020
  918. checkCudaErrors(cuDeviceGetAttribute(pbTCC , CU_DEVICE_ATTRIBUTE_TCC_DRIVER, dev));
  919. printf(" -> GPU %d: < %s > driver mode is: %s\n", dev, device_name, *pbTCC ? "TCC" : "WDDM");
  920. #else
  921.  
  922. // We don't know for sure if this is a TCC device or not, if it is Tesla we will not run
  923. if (!STRNCASECMP(device_name, "Tesla", 5))
  924. {
  925. printf(" \"%s\" does not support %s\n", device_name, sSDKname);
  926. *pbTCC = 1;
  927. return false;
  928. }
  929. else
  930. {
  931. *pbTCC = 0;
  932. }
  933.  
  934. #endif
  935. }
  936.  
  937. if (!(*pbTCC))
  938. {
  939. // initialize GLUT
  940. glutInit(&argc, argv);
  941. glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
  942. glutInitWindowSize(g_nWindowWidth, g_nWindowHeight);
  943. glutCreateWindow(sAppName);
  944.  
  945. printf(">> initGL() creating window [%d x %d]\n", g_nWindowWidth, g_nWindowHeight);
  946.  
  947. glutDisplayFunc(display);
  948. glutReshapeFunc(reshape);
  949. glutKeyboardFunc(keyboard);
  950. glutIdleFunc(idle);
  951.  
  952. glewInit();
  953.  
  954. if (!glewIsSupported("GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object"))
  955. {
  956. fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
  957. fprintf(stderr, "This sample requires:\n");
  958. fprintf(stderr, " OpenGL version 1.5\n");
  959. fprintf(stderr, " GL_ARB_vertex_buffer_object\n");
  960. fprintf(stderr, " GL_ARB_pixel_buffer_object\n");
  961. return true;
  962. }
  963.  
  964. if (!g_bUseVsync)
  965. {
  966. setVSync(0);
  967. }
  968. }
  969. else
  970. {
  971. fprintf(stderr, "> %s is decoding w/o visualization\n", sSDKname);
  972. }
  973.  
  974. return true;
  975. }
  976.  
  977.  
  978. // Initializes OpenGL Textures (allocation and initialization)
  979. bool
  980. initGLTexture(unsigned int nWidth, unsigned int nHeight)
  981. {
  982. g_pImageGL = new ImageGL(nWidth, nHeight,
  983. nWidth, nHeight,
  984. g_bIsProgressive,
  985. ImageGL::BGRA_PIXEL_FORMAT);
  986. g_pImageGL->clear(0x80);
  987.  
  988. g_pImageGL->setCUDAcontext(g_oContext);
  989. g_pImageGL->setCUDAdevice(g_oDevice);
  990. return true;
  991. }
  992.  
  993. bool
  994. loadVideoSource(const char *video_file,
  995. unsigned int &width , unsigned int &height,
  996. unsigned int &dispWidth, unsigned int &dispHeight)
  997. {
  998. std::auto_ptr<FrameQueue> apFrameQueue(new FrameQueue);
  999. std::auto_ptr<VideoSource> apVideoSource(new VideoSource(video_file, apFrameQueue.get()));
  1000.  
  1001. // retrieve the video source (width,height)
  1002. apVideoSource->getSourceDimensions(width, height);
  1003. apVideoSource->getSourceDimensions(dispWidth, dispHeight);
  1004.  
  1005. std::cout << apVideoSource->format() << std::endl;
  1006.  
  1007. if (g_bFrameRepeat)
  1008. {
  1009. g_iRepeatFactor = (int)(60.0f / ceil((float)apVideoSource->format().frame_rate.numerator / (float)apVideoSource->format().frame_rate.denominator));
  1010. printf("Frame Rate Playback Speed = %d fps\n", 60 / g_iRepeatFactor);
  1011. }
  1012.  
  1013. g_pFrameQueue = apFrameQueue.release();
  1014. g_pVideoSource = apVideoSource.release();
  1015.  
  1016. if (g_pVideoSource->format().codec == cudaVideoCodec_JPEG ||
  1017. g_pVideoSource->format().codec == cudaVideoCodec_MPEG2)
  1018. {
  1019. g_eVideoCreateFlags = cudaVideoCreate_PreferCUDA;
  1020. }
  1021.  
  1022. bool IsProgressive = 0;
  1023. g_pVideoSource->getProgressive(IsProgressive);
  1024. return IsProgressive;
  1025. }
  1026.  
  1027. void
  1028. initCudaVideo()
  1029. {
  1030. // bind the context lock to the CUDA context
  1031. CUresult result = cuvidCtxLockCreate(&g_CtxLock, g_oContext);
  1032.  
  1033. if (result != CUDA_SUCCESS)
  1034. {
  1035. printf("cuvidCtxLockCreate failed: %d\n", result);
  1036. assert(0);
  1037. }
  1038.  
  1039. std::auto_ptr<VideoDecoder> apVideoDecoder(new VideoDecoder(g_pVideoSource->format(), g_oContext, g_eVideoCreateFlags, g_CtxLock));
  1040. std::auto_ptr<VideoParser> apVideoParser(new VideoParser(apVideoDecoder.get(), g_pFrameQueue, &g_oContext));
  1041. g_pVideoSource->setParser(*apVideoParser.get());
  1042.  
  1043. g_pVideoParser = apVideoParser.release();
  1044. g_pVideoDecoder = apVideoDecoder.release();
  1045.  
  1046. // Create a Stream ID for handling Readback
  1047. if (g_bReadback)
  1048. {
  1049. checkCudaErrors(cuStreamCreate(&g_ReadbackSID, 0));
  1050. checkCudaErrors(cuStreamCreate(&g_KernelSID, 0));
  1051. printf(">> initCudaVideo()\n");
  1052. printf(" CUDA Streams (%s) <g_ReadbackSID = %p>\n", ((g_ReadbackSID == 0) ? "Disabled" : "Enabled"), g_ReadbackSID);
  1053. printf(" CUDA Streams (%s) <g_KernelSID = %p>\n", ((g_KernelSID == 0) ? "Disabled" : "Enabled"), g_KernelSID);
  1054. }
  1055. }
  1056.  
  1057.  
  1058. void
  1059. freeCudaResources(bool bDestroyContext)
  1060. {
  1061. if (g_pVideoParser)
  1062. {
  1063. delete g_pVideoParser;
  1064. }
  1065.  
  1066. if (g_pVideoDecoder)
  1067. {
  1068. delete g_pVideoDecoder;
  1069. }
  1070.  
  1071. if (g_pVideoSource)
  1072. {
  1073. delete g_pVideoSource;
  1074. }
  1075.  
  1076. if (g_pFrameQueue)
  1077. {
  1078. delete g_pFrameQueue;
  1079. }
  1080.  
  1081. if (g_CtxLock)
  1082. {
  1083. checkCudaErrors(cuvidCtxLockDestroy(g_CtxLock));
  1084. }
  1085.  
  1086. if (g_oContext && bDestroyContext)
  1087. {
  1088. checkCudaErrors(cuCtxDestroy(g_oContext));
  1089. g_oContext = NULL;
  1090. }
  1091.  
  1092. if (g_ReadbackSID)
  1093. {
  1094. checkCudaErrors(cuStreamDestroy(g_ReadbackSID));
  1095. }
  1096.  
  1097. if (g_KernelSID)
  1098. {
  1099. checkCudaErrors(cuStreamDestroy(g_KernelSID));
  1100. }
  1101. }
  1102.  
  1103. // Run the Cuda part of the computation (if g_pFrameQueue is empty, then return false)
  1104. bool copyDecodedFrameToTexture(unsigned int &nRepeats, int bUseInterop, int *pbIsProgressive)
  1105. {
  1106. CUVIDPARSERDISPINFO oDisplayInfo;
  1107.  
  1108. if (g_pFrameQueue->dequeue(&oDisplayInfo))
  1109. {
  1110. CCtxAutoLock lck(g_CtxLock);
  1111. // Push the current CUDA context (only if we are using CUDA decoding path)
  1112. CUresult result = cuCtxPushCurrent(g_oContext);
  1113.  
  1114. CUdeviceptr pDecodedFrame[2] = { 0, 0 };
  1115. CUdeviceptr pInteropFrame[2] = { 0, 0 };
  1116. int num_fields = (oDisplayInfo.progressive_frame ? (1) : (2+oDisplayInfo.repeat_first_field));
  1117. *pbIsProgressive = oDisplayInfo.progressive_frame;
  1118. g_bIsProgressive = oDisplayInfo.progressive_frame ? true : false;
  1119.  
  1120. for (int active_field=0; active_field<num_fields; active_field++)
  1121. {
  1122. nRepeats = oDisplayInfo.repeat_first_field;
  1123. CUVIDPROCPARAMS oVideoProcessingParameters;
  1124. memset(&oVideoProcessingParameters, 0, sizeof(CUVIDPROCPARAMS));
  1125.  
  1126. oVideoProcessingParameters.progressive_frame = oDisplayInfo.progressive_frame;
  1127. oVideoProcessingParameters.second_field = active_field;
  1128. oVideoProcessingParameters.top_field_first = oDisplayInfo.top_field_first;
  1129. oVideoProcessingParameters.unpaired_field = (num_fields == 1);
  1130.  
  1131. unsigned int nWidth = 0;
  1132. unsigned int nHeight = 0;
  1133. unsigned int nDecodedPitch = 0;
  1134.  
  1135. // map decoded video frame to CUDA surface
  1136. g_pVideoDecoder->mapFrame(oDisplayInfo.picture_index, &pDecodedFrame[active_field], &nDecodedPitch, &oVideoProcessingParameters);
  1137. nWidth = PAD_ALIGN(g_pVideoDecoder->targetWidth() , 0x3F);
  1138. nHeight = PAD_ALIGN(g_pVideoDecoder->targetHeight(), 0x0F);
  1139. // map OpenGL PBO or CUDA memory
  1140. size_t pFramePitch = 0;
  1141.  
  1142. // If we are Encoding and this is the 1st Frame, we make sure we allocate system memory for readbacks
  1143. if (g_bReadback && g_bFirstFrame && g_ReadbackSID)
  1144. {
  1145. CUresult result;
  1146. checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameData[0], (nDecodedPitch * nHeight * 3 / 2)));
  1147. checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameData[1], (nDecodedPitch * nHeight * 3 / 2)));
  1148.  
  1149.  
  1150. g_bFirstFrame = false;
  1151.  
  1152. if (result != CUDA_SUCCESS)
  1153. {
  1154. printf("cuMemAllocHost returned %d\n", (int)result);
  1155. checkCudaErrors(result);
  1156. }
  1157. }
  1158.  
  1159. // If streams are enabled, we can perform the readback to the host while the kernel is executing
  1160. if (g_bReadback && g_ReadbackSID)
  1161. {
  1162. CUresult result = cuMemcpyDtoHAsync(g_bFrameData[active_field], pDecodedFrame[active_field], (nDecodedPitch * nHeight * 3 / 2), g_ReadbackSID);
  1163.  
  1164. if (result != CUDA_SUCCESS)
  1165. {
  1166. printf("cuMemAllocHost returned %d\n", (int)result);
  1167. checkCudaErrors(result);
  1168. }
  1169. }
  1170.  
  1171. #if ENABLE_DEBUG_OUT
  1172. printf("%s = %02d, PicIndex = %02d, OutputPTS = %08d\n",
  1173. (oDisplayInfo.progressive_frame ? "Frame" : "Field"),
  1174. g_DecodeFrameCount, oDisplayInfo.picture_index, oDisplayInfo.timestamp);
  1175. #endif
  1176.  
  1177. if (g_pImageGL)
  1178. {
  1179. // map the texture surface
  1180. g_pImageGL->map(&pInteropFrame[active_field], &pFramePitch, active_field);
  1181. pFramePitch = g_nWindowWidth * 4;
  1182. }
  1183. else
  1184. {
  1185. pInteropFrame[active_field] = g_pInteropFrame[active_field];
  1186. //RMC modified next line
  1187. pFramePitch = g_pVideoDecoder->targetWidth() * 4;
  1188. }
  1189.  
  1190. // perform post processing on the CUDA surface (performs colors space conversion and post processing)
  1191. // comment this out if we inclue the line of code seen above
  1192.  
  1193.  
  1194. cudaPostProcessFrame(&pDecodedFrame[active_field], nDecodedPitch, &pInteropFrame[active_field],
  1195. pFramePitch, g_pCudaModule->getModule(), g_kernelNV12toARGB, g_KernelSID);
  1196. //RMC ADD
  1197. if ((g_FrameCapCount >= 0) && !g_bUseInterop)
  1198. if (++g_FrameCapCount == g_FrameCapSelect){
  1199. g_FrameCapPitch = pFramePitch;
  1200. g_FrameCapHeight = nHeight;
  1201. checkCudaErrors(result = cuMemAllocHost((void **)&g_bFrameCapData, (g_FrameCapPitch * g_FrameCapHeight)));
  1202. checkCudaErrors(cuMemcpyDtoH(g_bFrameCapData, pInteropFrame[active_field], (g_FrameCapPitch * g_FrameCapHeight)));
  1203. printf("frame %d captured\n", g_FrameCapSelect);
  1204. saveFrameCap(g_bFrameCapData, g_FrameCapPitch, g_FrameCapHeight);
  1205. }
  1206. //END RMC ADD
  1207.  
  1208. if (g_pImageGL)
  1209. {
  1210. // unmap the texture surface
  1211. g_pImageGL->unmap(active_field);
  1212. }
  1213.  
  1214. // unmap video frame
  1215. // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding)
  1216. g_pVideoDecoder->unmapFrame(pDecodedFrame[active_field]);
  1217. // release the frame, so it can be re-used in decoder
  1218. g_pFrameQueue->releaseFrame(&oDisplayInfo);
  1219.  
  1220. g_DecodeFrameCount++;
  1221. }
  1222.  
  1223. // Detach from the Current thread
  1224. checkCudaErrors(cuCtxPopCurrent(NULL));
  1225. }
  1226. else
  1227. {
  1228. // Frame Queue has no frames, we don't compute FPS until we start
  1229. return false;
  1230. }
  1231.  
  1232. // check if decoding has come to an end.
  1233. // if yes, signal the app to shut down.
  1234. if (!g_pVideoSource->isStarted() || g_pFrameQueue->isEndOfDecode())
  1235. {
  1236. // Let's free the Frame Data
  1237. if (g_ReadbackSID && g_bFrameData)
  1238. {
  1239. checkCudaErrors(cuMemFreeHost((void *)g_bFrameData[0]));
  1240. checkCudaErrors(cuMemFreeHost((void *)g_bFrameData[1]));
  1241. g_bFrameData[0] = NULL;
  1242. g_bFrameData[1] = NULL;
  1243. }
  1244.  
  1245. // Let's just stop, and allow the user to quit, so they can at least see the results
  1246. g_pVideoSource->stop();
  1247.  
  1248. // If we want to loop reload the video file and restart
  1249. if (g_bLoop && !g_bAutoQuit)
  1250. {
  1251. reinitCudaResources();
  1252. g_FrameCount = 0;
  1253. g_DecodeFrameCount = 0;
  1254. g_pVideoSource->start();
  1255. }
  1256.  
  1257. if (g_bAutoQuit)
  1258. {
  1259. g_bDone = true;
  1260. }
  1261. }
  1262.  
  1263. return true;
  1264. }
  1265.  
  1266. // This is the CUDA stage for Video Post Processing. Last stage takes care of the NV12 to ARGB
  1267. void
  1268. cudaPostProcessFrame(CUdeviceptr *ppDecodedFrame, size_t nDecodedPitch,
  1269. CUdeviceptr *ppInteropFrame, size_t pFramePitch,
  1270. CUmodule cuModNV12toARGB,
  1271. CUfunction fpCudaKernel, CUstream streamID)
  1272. {
  1273. uint32 nWidth = g_pVideoDecoder->targetWidth();
  1274. uint32 nHeight = g_pVideoDecoder->targetHeight();
  1275.  
  1276. // Upload the Color Space Conversion Matrices
  1277. if (g_bUpdateCSC)
  1278. {
  1279. // CCIR 601/709
  1280. float hueColorSpaceMat[9];
  1281. setColorSpaceMatrix(g_eColorSpace, hueColorSpaceMat, g_nHue);
  1282. updateConstantMemory_drvapi(cuModNV12toARGB, hueColorSpaceMat);
  1283.  
  1284. if (!g_bUpdateAll)
  1285. {
  1286. g_bUpdateCSC = false;
  1287. }
  1288. }
  1289.  
  1290. // TODO: Stage for handling video post processing
  1291.  
  1292. // Final Stage: NV12toARGB color space conversion
  1293. CUresult eResult;
  1294. eResult = cudaLaunchNV12toARGBDrv(*ppDecodedFrame, nDecodedPitch,
  1295. *ppInteropFrame, pFramePitch,
  1296. nWidth, nHeight, fpCudaKernel, streamID);
  1297. }
  1298.  
  1299. // Draw the final result on the screen
  1300. bool drawScene(int field_num)
  1301. {
  1302. bool hr = true;
  1303.  
  1304. // Normal OpenGL rendering code
  1305. // render image
  1306. if (g_pImageGL)
  1307. {
  1308. g_pImageGL->render(field_num);
  1309. }
  1310.  
  1311. return hr;
  1312. }
  1313.  
  1314. //RMC ADD
  1315. void SaveBMP ( BYTE* data, int w, int h )
  1316. {
  1317. FILE *f;
  1318. unsigned char *img = NULL;
  1319. int filesize = 54 + 3*w*h; //w is your image width, h is image height, both int
  1320. if( img )
  1321. free( img );
  1322. img = (unsigned char *)malloc(3*w*h);
  1323. memset(img,0,sizeof(img));
  1324.  
  1325. for(int j=0; j<h; j++)
  1326. {
  1327. for(int i=0; i<w; i++)
  1328. {
  1329. unsigned char r,g,b;
  1330. r = data[(((j*w)+i)*4)+2];
  1331. g = data[(((j*w)+i)*4)+1];
  1332. b = data[(((j*w)+i)*4)+0];
  1333. img[(((j*w)+i)*3)+2] = (r);
  1334. img[(((j*w)+i)*3)+1] = (g);
  1335. img[(((j*w)+i)*3)+0] = (b);
  1336. }
  1337. }
  1338.  
  1339. unsigned char bmpfileheader[14] = {'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0};
  1340. unsigned char bmpinfoheader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0};
  1341. unsigned char bmppad[3] = {0,0,0};
  1342.  
  1343. bmpfileheader[ 2] = (unsigned char)(filesize );
  1344. bmpfileheader[ 3] = (unsigned char)(filesize>> 8);
  1345. bmpfileheader[ 4] = (unsigned char)(filesize>>16);
  1346. bmpfileheader[ 5] = (unsigned char)(filesize>>24);
  1347.  
  1348. bmpinfoheader[ 4] = (unsigned char)( w );
  1349. bmpinfoheader[ 5] = (unsigned char)( w>> 8);
  1350. bmpinfoheader[ 6] = (unsigned char)( w>>16);
  1351. bmpinfoheader[ 7] = (unsigned char)( w>>24);
  1352. bmpinfoheader[ 8] = (unsigned char)( h );
  1353. bmpinfoheader[ 9] = (unsigned char)( h>> 8);
  1354. bmpinfoheader[10] = (unsigned char)( h>>16);
  1355. bmpinfoheader[11] = (unsigned char)( h>>24);
  1356.  
  1357. f = fopen(g_FrameCapFile,"wb");
  1358. fwrite(bmpfileheader,1,14,f);
  1359. fwrite(bmpinfoheader,1,40,f);
  1360. for(int i=0; i<h; i++)
  1361. {
  1362. fwrite(img+(w*(h-i-1)*3),3,w,f);
  1363. fwrite(bmppad,1,(4-(w*3)%4)%4,f);
  1364. }
  1365. fclose(f);
  1366. printf("saved frame capture file width = %d, height = %d\n", w, h);
  1367. }
  1368.  
  1369. void saveFrameCap(BYTE *data, size_t pitch, size_t height){
  1370. if (data && !g_bUseInterop)
  1371. SaveBMP(data, pitch/4, height);
  1372.  
  1373. }
  1374. //END RMC ADD
  1375. // Release all previously initd objects
  1376. bool cleanup(bool bDestroyContext)
  1377. {
  1378. if (bDestroyContext)
  1379. {
  1380. // Attach the CUDA Context (so we may properly free memroy)
  1381. checkCudaErrors(cuCtxPushCurrent(g_oContext));
  1382.  
  1383. if (g_pInteropFrame[0])
  1384. {
  1385. checkCudaErrors(cuMemFree(g_pInteropFrame[0]));
  1386. }
  1387.  
  1388. if (g_pInteropFrame[1])
  1389. {
  1390. checkCudaErrors(cuMemFree(g_pInteropFrame[1]));
  1391. }
  1392.  
  1393. // Detach from the Current thread
  1394. checkCudaErrors(cuCtxPopCurrent(NULL));
  1395. }
  1396.  
  1397. if (g_pImageGL)
  1398. {
  1399. delete g_pImageGL;
  1400. g_pImageGL = NULL;
  1401. }
  1402.  
  1403. freeCudaResources(bDestroyContext);
  1404.  
  1405. return true;
  1406. }
  1407.  
  1408. // Launches the CUDA kernels to fill in the texture data
  1409. void renderVideoFrame(int bUseInterop)
  1410. {
  1411. static unsigned int nRepeatFrame = 0;
  1412. int repeatFactor = g_iRepeatFactor;
  1413. int bIsProgressive = 1, bFPSComputed = 0;
  1414. bool bFramesDecoded = false;
  1415.  
  1416. if (0 != g_pFrameQueue)
  1417. {
  1418. // if not running, we simply don't copy new frames from the decoder
  1419. if (!g_bDeviceLost && g_bRunning)
  1420. {
  1421. bFramesDecoded = copyDecodedFrameToTexture(nRepeatFrame, bUseInterop, &bIsProgressive);
  1422. }
  1423. }
  1424. else
  1425. {
  1426. return;
  1427. }
  1428.  
  1429. if (bFramesDecoded)
  1430. {
  1431. while (repeatFactor-- > 0)
  1432. {
  1433. if (g_bUseDisplay && bUseInterop)
  1434. {
  1435. // We will always draw field/frame 0
  1436. drawScene(0);
  1437. glutSwapBuffers();
  1438.  
  1439. if (!repeatFactor)
  1440. {
  1441. computeFPS(bUseInterop);
  1442. }
  1443.  
  1444. // If interlaced mode, then we will need to draw field 1 separately
  1445. if (!bIsProgressive)
  1446. {
  1447. drawScene(1);
  1448. glutSwapBuffers();
  1449.  
  1450. if (!repeatFactor)
  1451. {
  1452. computeFPS(bUseInterop);
  1453. }
  1454. }
  1455.  
  1456. bFPSComputed = 1;
  1457. }
  1458.  
  1459. // Pass the Windows handle to show Frame Rate on the window title
  1460. if (!bFPSComputed && !repeatFactor)
  1461. {
  1462. computeFPS(bUseInterop);
  1463. }
  1464.  
  1465. if (g_bUseDisplay && bUseInterop)
  1466. {
  1467. glutReportErrors();
  1468. }
  1469.  
  1470. }
  1471. }
  1472.  
  1473. if (bFramesDecoded && g_bFrameStep)
  1474. {
  1475. if (g_bRunning)
  1476. {
  1477. g_bRunning = false;
  1478. }
  1479. }
  1480. }
Advertisement
Add Comment
Please, Sign In to add comment