Advertisement
Guest User

Untitled

a guest
Jun 7th, 2019
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.98 KB | None | 0 0
  1. int inference(void* p_engine, void* p_context, float *input_img, float output_arr[NUM_OF_OUTPUTS])
  2. {
  3. /*
  4. * Get an image buffer ready for inference and run the NN on it.
  5. * The image is expected to be AFTER all preprocessing steps -
  6. * croping, resizing, rescale and normalization (unless this is done by batchnorm).
  7. */
  8. LOG("TRTLib: clearing output array\n");
  9. memset(output_arr, 0, (sizeof(float) * NUM_OF_OUTPUTS));
  10.  
  11. LOG("TRTLib: assigning from input pointers\n");
  12.  
  13. ICudaEngine &engine = *((ICudaEngine*)p_engine);
  14. IExecutionContext* context = (IExecutionContext*)p_context;
  15.  
  16.  
  17. LOG("TRTLib: getting bindings from engine\n");
  18. int batchSize = 1;
  19.  
  20. int nbBindings = engine.getNbBindings();
  21. assert(nbBindings == TOTAL_BINDINGS);
  22.  
  23. std::vector<void*> buffers(nbBindings);
  24. auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);
  25.  
  26. int bindingIdxInput = 0;
  27. for (int i = 0; i < nbBindings; ++i)
  28. {
  29. if (engine.bindingIsInput(i))
  30. {
  31. bindingIdxInput = i;
  32. }
  33. else
  34. {
  35. auto bufferSizesOutput = buffersSizes[i];
  36. buffers[i] = safeCudaMalloc(bufferSizesOutput.first *
  37. elementSizeTrt(bufferSizesOutput.second));
  38. }
  39. }
  40.  
  41. auto bufferSizesInput = buffersSizes[bindingIdxInput];
  42.  
  43. LOG("TRTLib: creating buffer for input \n");
  44.  
  45. buffers[bindingIdxInput] = createImageCudaBuffer(bufferSizesInput.first,
  46. bufferSizesInput.second, input_img);
  47.  
  48. LOG("TRTLib: executing inference\n");
  49.  
  50. LOG("TRTLib: moving output from GPU to host\n");
  51.  
  52. int output_idx = 0;
  53. for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
  54. {
  55. float output;
  56.  
  57. if (engine.bindingIsInput(bindingIdx))
  58. continue;
  59.  
  60. auto bufferSizesOutput = buffersSizes[bindingIdx];
  61. output = getOutputs(bufferSizesOutput.first, bufferSizesOutput.second,
  62. buffers[bindingIdx], bindingIdx);
  63.  
  64. LOG("assigning output %f in array slot %d\n", output, output_idx);
  65. output_arr[output_idx++] = output;
  66. }
  67.  
  68. LOG("TRTLib: clean GPU mem\n");
  69.  
  70. CHECK(cudaFree(buffers[bindingIdxInput]));
  71.  
  72. for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
  73. if (!engine.bindingIsInput(bindingIdx))
  74. CHECK(cudaFree(buffers[bindingIdx]));
  75.  
  76.  
  77. LOG("TRTLib: DONE\n");
  78.  
  79. return 0;
  80. }
  81.  
  82.  
  83. int build_engine(std::string uff_path, uint8_t input_shape[2], void** out_engine, void** out_context)
  84. {
  85. /*
  86. * This function will prepare a tensorRT engine, ready for inference jobs.
  87. * It should be called only once per NN.
  88. *
  89. * @uff_path : Full path to .uff model file.
  90. * Note that this is not completely flexible, as input/output
  91. * size/names are hardcoded in the 'trtinference.h' file.
  92. * @input_shape : Integer array for input image size. should be [Height, Width].
  93. * Only grayscale images (single channel) are supported now.
  94. */
  95. *out_engine = NULL;
  96. *out_context = NULL;
  97.  
  98. LOG("TRTlib: %s\n", uff_path.c_str());
  99. LOG("TRTlib: %u,%u\n", input_shape[0], input_shape[1]);
  100.  
  101. int maxBatchSize = 1;
  102. auto parser = createUffParser();
  103.  
  104. INPUT_H = input_shape[0];
  105. INPUT_W = input_shape[1];
  106.  
  107. /* Register tensorflow input */
  108. parser->registerInput(INPUT_BINDING_NAME,
  109. Dims3(INPUT_C, INPUT_H, INPUT_W),
  110. UffInputOrder::kNCHW);
  111. parser->registerOutput(OUTPUT_1_BINDING_NAME);
  112. parser->registerOutput(OUTPUT_2_BINDING_NAME);
  113.  
  114. ICudaEngine* engine = loadModelAndCreateEngine(uff_path.c_str(), maxBatchSize, parser);
  115.  
  116. if (!engine) {
  117. std::cout << "Failed to create engine" << std::endl;
  118. return -1;
  119. }
  120.  
  121. /* we dont need to keep the memory created by the parser */
  122. parser->destroy();
  123.  
  124. IExecutionContext* context = engine->createExecutionContext();
  125.  
  126. *out_engine = (void*)engine;
  127. *out_context = (void*)context;
  128.  
  129. return 0;
  130. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement