Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int inference(void* p_engine, void* p_context, float *input_img, float output_arr[NUM_OF_OUTPUTS])
- {
- /*
- * Get an image buffer ready for inference and run the NN on it.
- * The image is expected to be AFTER all preprocessing steps -
- * croping, resizing, rescale and normalization (unless this is done by batchnorm).
- */
- LOG("TRTLib: clearing output array\n");
- memset(output_arr, 0, (sizeof(float) * NUM_OF_OUTPUTS));
- LOG("TRTLib: assigning from input pointers\n");
- ICudaEngine &engine = *((ICudaEngine*)p_engine);
- IExecutionContext* context = (IExecutionContext*)p_context;
- LOG("TRTLib: getting bindings from engine\n");
- int batchSize = 1;
- int nbBindings = engine.getNbBindings();
- assert(nbBindings == TOTAL_BINDINGS);
- std::vector<void*> buffers(nbBindings);
- auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);
- int bindingIdxInput = 0;
- for (int i = 0; i < nbBindings; ++i)
- {
- if (engine.bindingIsInput(i))
- {
- bindingIdxInput = i;
- }
- else
- {
- auto bufferSizesOutput = buffersSizes[i];
- buffers[i] = safeCudaMalloc(bufferSizesOutput.first *
- elementSizeTrt(bufferSizesOutput.second));
- }
- }
- auto bufferSizesInput = buffersSizes[bindingIdxInput];
- LOG("TRTLib: creating buffer for input \n");
- buffers[bindingIdxInput] = createImageCudaBuffer(bufferSizesInput.first,
- bufferSizesInput.second, input_img);
- LOG("TRTLib: executing inference\n");
- LOG("TRTLib: moving output from GPU to host\n");
- int output_idx = 0;
- for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
- {
- float output;
- if (engine.bindingIsInput(bindingIdx))
- continue;
- auto bufferSizesOutput = buffersSizes[bindingIdx];
- output = getOutputs(bufferSizesOutput.first, bufferSizesOutput.second,
- buffers[bindingIdx], bindingIdx);
- LOG("assigning output %f in array slot %d\n", output, output_idx);
- output_arr[output_idx++] = output;
- }
- LOG("TRTLib: clean GPU mem\n");
- CHECK(cudaFree(buffers[bindingIdxInput]));
- for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
- if (!engine.bindingIsInput(bindingIdx))
- CHECK(cudaFree(buffers[bindingIdx]));
- LOG("TRTLib: DONE\n");
- return 0;
- }
- int build_engine(std::string uff_path, uint8_t input_shape[2], void** out_engine, void** out_context)
- {
- /*
- * This function will prepare a tensorRT engine, ready for inference jobs.
- * It should be called only once per NN.
- *
- * @uff_path : Full path to .uff model file.
- * Note that this is not completely flexible, as input/output
- * size/names are hardcoded in the 'trtinference.h' file.
- * @input_shape : Integer array for input image size. should be [Height, Width].
- * Only grayscale images (single channel) are supported now.
- */
- *out_engine = NULL;
- *out_context = NULL;
- LOG("TRTlib: %s\n", uff_path.c_str());
- LOG("TRTlib: %u,%u\n", input_shape[0], input_shape[1]);
- int maxBatchSize = 1;
- auto parser = createUffParser();
- INPUT_H = input_shape[0];
- INPUT_W = input_shape[1];
- /* Register tensorflow input */
- parser->registerInput(INPUT_BINDING_NAME,
- Dims3(INPUT_C, INPUT_H, INPUT_W),
- UffInputOrder::kNCHW);
- parser->registerOutput(OUTPUT_1_BINDING_NAME);
- parser->registerOutput(OUTPUT_2_BINDING_NAME);
- ICudaEngine* engine = loadModelAndCreateEngine(uff_path.c_str(), maxBatchSize, parser);
- if (!engine) {
- std::cout << "Failed to create engine" << std::endl;
- return -1;
- }
- /* we dont need to keep the memory created by the parser */
- parser->destroy();
- IExecutionContext* context = engine->createExecutionContext();
- *out_engine = (void*)engine;
- *out_context = (void*)context;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement