Untitled

int inference(void* p_engine, void* p_context, float *input_img, float output_arr[NUM_OF_OUTPUTS])
{
  /*
   * Get an image buffer ready for inference and run the NN on it.
   * The image is expected to be AFTER all preprocessing steps -
   *  croping, resizing, rescale and normalization (unless this is done by batchnorm).
   */
  LOG("TRTLib: clearing output array\n");
  memset(output_arr, 0, (sizeof(float) * NUM_OF_OUTPUTS));

  LOG("TRTLib: assigning from input pointers\n");

  ICudaEngine &engine = *((ICudaEngine*)p_engine);
  IExecutionContext* context = (IExecutionContext*)p_context;


  LOG("TRTLib: getting bindings from engine\n");
  int batchSize = 1;

  int nbBindings = engine.getNbBindings();
  assert(nbBindings == TOTAL_BINDINGS);

  std::vector<void*> buffers(nbBindings);
  auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);

  int bindingIdxInput = 0;
  for (int i = 0; i < nbBindings; ++i)
  {
    if (engine.bindingIsInput(i))
    {
      bindingIdxInput = i;
    }
    else
    {
      auto bufferSizesOutput = buffersSizes[i];
      buffers[i] = safeCudaMalloc(bufferSizesOutput.first *
                                  elementSizeTrt(bufferSizesOutput.second));
    }
  }

  auto bufferSizesInput = buffersSizes[bindingIdxInput];

  LOG("TRTLib: creating buffer for input \n");

  buffers[bindingIdxInput] = createImageCudaBuffer(bufferSizesInput.first,
                                                   bufferSizesInput.second, input_img);

  LOG("TRTLib: executing inference\n");

  LOG("TRTLib: moving output from GPU to host\n");

  int output_idx = 0;
  for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
  {
    float output;

    if (engine.bindingIsInput(bindingIdx))
      continue;

    auto bufferSizesOutput = buffersSizes[bindingIdx];
    output = getOutputs(bufferSizesOutput.first, bufferSizesOutput.second,
                        buffers[bindingIdx], bindingIdx);

    LOG("assigning output %f in array slot %d\n", output, output_idx);
    output_arr[output_idx++] = output;
  }

  LOG("TRTLib: clean GPU mem\n");

  CHECK(cudaFree(buffers[bindingIdxInput]));

  for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
    if (!engine.bindingIsInput(bindingIdx))
      CHECK(cudaFree(buffers[bindingIdx]));


  LOG("TRTLib: DONE\n");

  return 0;
}


int build_engine(std::string uff_path, uint8_t input_shape[2], void** out_engine, void** out_context)
{
  /*
   * This function will prepare a tensorRT engine, ready for inference jobs.
   * It should be called only once per NN.
   *
   * @uff_path    : Full path to .uff model file.
   *                Note that this is not completely flexible, as input/output
   *                   size/names are hardcoded in the 'trtinference.h' file.
   * @input_shape : Integer array for input image size. should be [Height, Width].
   *                Only grayscale images (single channel) are supported now.
   */
  *out_engine = NULL;
  *out_context = NULL;

  LOG("TRTlib: %s\n", uff_path.c_str());
  LOG("TRTlib: %u,%u\n", input_shape[0], input_shape[1]);

  int maxBatchSize = 1;
  auto parser = createUffParser();

  INPUT_H = input_shape[0];
  INPUT_W = input_shape[1];

  /* Register tensorflow input */
  parser->registerInput(INPUT_BINDING_NAME,
                        Dims3(INPUT_C, INPUT_H, INPUT_W),
                        UffInputOrder::kNCHW);
  parser->registerOutput(OUTPUT_1_BINDING_NAME);
  parser->registerOutput(OUTPUT_2_BINDING_NAME);

  ICudaEngine* engine = loadModelAndCreateEngine(uff_path.c_str(), maxBatchSize, parser);

  if (!engine) {
    std::cout << "Failed to create engine" << std::endl;
    return -1;
  }

  /* we dont need to keep the memory created by the parser */
  parser->destroy();

  IExecutionContext* context = engine->createExecutionContext();

  *out_engine = (void*)engine;
  *out_context = (void*)context;

  return 0;
}