NVIDIA DeepStream SDK API Reference

7.1 Release
nvdsinfer_model_builder.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4  *
5  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6  * property and proprietary rights in and to this material, related
7  * documentation and any modifications thereto. Any use, reproduction,
8  * disclosure or distribution of this material and related documentation
9  * without an express license agreement from NVIDIA CORPORATION or
10  * its affiliates is strictly prohibited.
11  */
12 
13 #ifndef __NVDSINFER_MODEL_BUILDER_H__
14 #define __NVDSINFER_MODEL_BUILDER_H__
15 
16 #include <stdarg.h>
17 #include <algorithm>
18 #include <condition_variable>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <queue>
23 #include <string>
24 #include <unordered_map>
25 
26 #include <NvInfer.h>
27 #include <NvInferRuntime.h>
28 #include <NvOnnxParser.h>
29 
30 #include <nvdsinfer_custom_impl.h>
31 #include "nvdsinfer_func_utils.h"
32 #include "nvdsinfer_tlt.h"
33 
34 /* This file provides APIs for building models from ONNX files. It
35  * also defines an interface where users can provide custom model parsers for
36  * custom networks. A helper class (TrtEngine) written on top of TensorRT's
37  * nvinfer1::ICudaEngine is also defined in this file.
38  *
39  * These interfaces/APIs are used by NvDsInferContextImpl class. */
40 
41 namespace nvdsinfer {
42 
44 
45 static const size_t kWorkSpaceSize = 450 * 1024 * 1024; // 450MB
46 
51 class BaseModelParser : public IModelParser
52 {
53 public:
55  const std::shared_ptr<DlLibHandle>& dllib)
56  : m_ModelParams(params), m_LibHandle(dllib) {}
57  virtual ~BaseModelParser() {}
58  virtual bool isValid() const = 0;
59 
60 private:
61  DISABLE_CLASS_COPY(BaseModelParser);
62 
63 protected:
65  std::shared_ptr<DlLibHandle> m_LibHandle;
66 };
67 
73 {
74 public:
76  const std::shared_ptr<DlLibHandle>& handle = nullptr)
77  : BaseModelParser(initParams, handle),
78  m_ModelName(initParams.onnxFilePath) {}
79  ~OnnxModelParser() override = default;
80  bool isValid() const override { return !m_ModelName.empty(); }
81  const char* getModelName() const override { return m_ModelName.c_str(); }
82  NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
83  bool hasFullDimsSupported() const override { return true; }
84 
85 private:
86  std::string m_ModelName;
87 
88 protected:
89  std::unique_ptr<nvonnxparser::IParser> m_OnnxParser;
90 };
91 
100 {
101 public:
103  const std::shared_ptr<DlLibHandle>& handle);
104 
106 
107  bool isValid() const override
108  {
109  return (bool)m_CustomParser;
110  }
111 
112  const char* getModelName() const override
113  {
114  return isValid() ? safeStr(m_CustomParser->getModelName()) : "";
115  }
116 
117  NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
118  bool hasFullDimsSupported() const override
119  {
120  return m_CustomParser->hasFullDimsSupported();
121  }
122 
123 private:
124  std::unique_ptr<IModelParser> m_CustomParser;
125 };
126 
128 class TrtModelBuilder;
129 
135 {
136  using TensorIOFormat =
137  std::tuple<nvinfer1::DataType, nvinfer1::TensorFormats>;
138  using LayerDevicePrecision =
139  std::tuple<nvinfer1::DataType, nvinfer1::DeviceType>;
140 
144  int dlaCore = -1;
145  std::unordered_map<std::string, TensorIOFormat> inputFormats;
146  std::unordered_map<std::string, TensorIOFormat> outputFormats;
147  std::unordered_map<std::string, LayerDevicePrecision> layerDevicePrecisions;
148 
149 public:
150  virtual ~BuildParams(){};
151  virtual NvDsInferStatus configBuilder(TrtModelBuilder& builder) = 0;
152  virtual bool sanityCheck() const;
153 };
154 
159 {
160  int maxBatchSize = 0;
161  std::vector<nvinfer1::Dims> inputDims;
162 
163 private:
164  NvDsInferStatus configBuilder(TrtModelBuilder& builder) override;
165 
166  bool sanityCheck() const override;
167 };
168 
169 using ProfileDims = std::array<nvinfer1::Dims,
170  nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
171 
176 {
177  // profileSelector, dims without batchSize
178  // each input must have 3 selector MIN/OPT/MAX for profile0,
179  // doesn't support multiple profiles
180  std::vector<ProfileDims> inputProfileDims;
181  int minBatchSize = 1;
182  int optBatchSize = 1;
183  int maxBatchSize = 1;
185 
186 private:
187  NvDsInferStatus configBuilder(TrtModelBuilder& builder) override;
188  bool sanityCheck() const override;
189 };
190 
195 {
196 public:
197  TrtEngine(std::unique_ptr<nvinfer1::ICudaEngine>&& engine, int dlaCore = -1)
198  : m_Engine(std::move(engine)), m_DlaCore(dlaCore) {}
199 
200  TrtEngine(std::unique_ptr<nvinfer1::ICudaEngine>&& engine,
201  const std::shared_ptr<nvinfer1::IRuntime>& runtime, int dlaCore = -1,
202  const std::shared_ptr<DlLibHandle>& dlHandle = nullptr,
203  nvinfer1::IPluginFactory* pluginFactory = nullptr);
204 
205  ~TrtEngine();
206 
207  bool hasDla() const { return m_DlaCore >= 0; }
208  int getDlaCore() const { return m_DlaCore; }
209 
211  std::vector<NvDsInferBatchDimsLayerInfo>& layersInfo);
213  int profileIdx, std::vector<NvDsInferBatchDimsLayerInfo>& layersInfo);
215 
216  void printEngineInfo();
217 
218  nvinfer1::ICudaEngine& engine()
219  {
220  assert(m_Engine);
221  return *m_Engine;
222  }
223 
224  nvinfer1::ICudaEngine* operator->()
225  {
226  assert(m_Engine);
227  return m_Engine.get();
228  }
229 
230 private:
231  DISABLE_CLASS_COPY(TrtEngine);
232 
233  std::shared_ptr<nvinfer1::IRuntime> m_Runtime;
234  std::unique_ptr<nvinfer1::ICudaEngine> m_Engine;
235  std::shared_ptr<DlLibHandle> m_DlHandle;
236  nvinfer1::IPluginFactory* m_RuntimePluginFactory = nullptr;
237  int m_DlaCore = -1;
238 
239  friend bool ::NvDsInferCudaEngineGetFromTltModel( nvinfer1::IBuilder * const builder,
240  nvinfer1::IBuilderConfig * const builderConfig,
241  const NvDsInferContextInitParams * const initParams,
242  nvinfer1::DataType dataType,
243  nvinfer1::ICudaEngine *& cudaEngine);
244 };
245 
255 {
256 public:
257  TrtModelBuilder(int gpuId, nvinfer1::ILogger& logger,
258  const std::shared_ptr<DlLibHandle>& dlHandle = nullptr, bool isEngineFile = false);
259 
261  m_Parser.reset();
262  }
263 
264  void setInt8Calibrator(std::unique_ptr<nvinfer1::IInt8Calibrator>&& calibrator)
265  {
266  m_Int8Calibrator = std::move(calibrator);
267  }
268 
269  /* Populate INetworkDefinition by parsing the model, build the engine and
270  * return it as TrtEngine instance. Also, returns a suggested path for
271  * writing the serialized engine to.
272  *
273  * Suggested path has the following format:
274  * suggested path = [modelName]_b[#batchSize]_[#device]_[#dataType].engine
275  */
276  std::unique_ptr<TrtEngine> buildModel(
277  const NvDsInferContextInitParams& initParams,
278  std::string& suggestedPathName);
279 
280  /* Builds the engine from an already populated INetworkDefinition based on
281  * the BuildParams passed to it. Returns the engine in the form of TrtEngine
282  * instance.
283  */
284  std::unique_ptr<TrtEngine> buildEngine(
285  nvinfer1::INetworkDefinition& network, BuildParams& options);
286 
287  /* Serialize engine to file
288  */
290  const std::string& path, nvinfer1::ICudaEngine& engine);
291 
292  /* Deserialize engine from file
293  */
294  std::unique_ptr<TrtEngine> deserializeEngine(
295  const std::string& path, int dla = -1);
296 
297 private:
298  /* Parses a model file using an IModelParser implementation for
299  * ONNX formats or from custom IModelParser implementation.
300  */
301  NvDsInferStatus buildNetwork(const NvDsInferContextInitParams& initParams);
302 
303  /* build cudaEngine from Netwwork, be careful for implicitBatch and
304  * explicitBatch.
305  */
306  std::unique_ptr<TrtEngine> buildEngine();
307 
308  /* Calls a custom library's implementaion of NvDsInferCudaEngineGet function
309  * to get a built ICudaEngine. */
310  std::unique_ptr<TrtEngine> getCudaEngineFromCustomLib(
311  NvDsInferCudaEngineGetFcnDeprecated cudaEngineGetDeprecatedFcn,
312  NvDsInferEngineCreateCustomFunc cudaEngineGetFcn,
313  const NvDsInferContextInitParams& initParams,
314  NvDsInferNetworkMode &networkMode);
315 
316 
317  /* config builder options */
318  NvDsInferStatus configCommonOptions(BuildParams& params);
319  NvDsInferStatus configImplicitOptions(ImplicitBuildParams& params);
320  NvDsInferStatus configExplicitOptions(ExplicitBuildParams& params);
321 
322  std::unique_ptr<BuildParams> createImplicitParams(
323  const NvDsInferContextInitParams& initParams);
324  std::unique_ptr<BuildParams> createDynamicParams(
325  const NvDsInferContextInitParams& initParams);
326  void initCommonParams(
327  BuildParams& params, const NvDsInferContextInitParams& initParams);
328 
329  DISABLE_CLASS_COPY(TrtModelBuilder);
330 
331  int m_GpuId = 0;
332  nvinfer1::ILogger& m_Logger;
333  std::shared_ptr<DlLibHandle> m_DlLib;
334  std::shared_ptr<BaseModelParser> m_Parser;
335  std::unique_ptr<BuildParams> m_Options;
336  std::unique_ptr<nvinfer1::IBuilder> m_Builder;
337  std::unique_ptr<nvinfer1::IBuilderConfig> m_BuilderConfig;
338  std::unique_ptr<nvinfer1::INetworkDefinition> m_Network;
339  std::shared_ptr<nvinfer1::IInt8Calibrator> m_Int8Calibrator;
340 
341  friend class BuildParams;
342  friend class ImplicitBuildParams;
343  friend class ExplicitBuildParams;
344 
345  friend bool ::NvDsInferCudaEngineGetFromTltModel( nvinfer1::IBuilder * const builder,
346  nvinfer1::IBuilderConfig * const builderConfig,
347  const NvDsInferContextInitParams * const initParams,
348  nvinfer1::DataType dataType,
349  nvinfer1::ICudaEngine *& cudaEngine);
350 };
351 
352 } // end of namespace nvdsinfer
353 
354 #endif
nvdsinfer::OnnxModelParser::~OnnxModelParser
~OnnxModelParser() override=default
nvdsinfer::OnnxModelParser::hasFullDimsSupported
bool hasFullDimsSupported() const override
Definition: nvdsinfer_model_builder.h:83
nvdsinfer::BuildParams::inputFormats
std::unordered_map< std::string, TensorIOFormat > inputFormats
Definition: nvdsinfer_model_builder.h:145
nvdsinfer::CustomModelParser::isValid
bool isValid() const override
Definition: nvdsinfer_model_builder.h:107
nvdsinfer::TrtEngine::getLayerInfo
NvDsInferStatus getLayerInfo(int idx, NvDsInferLayerInfo &layer)
nvdsinfer::TrtEngine::getImplicitLayersInfo
NvDsInferStatus getImplicitLayersInfo(std::vector< NvDsInferBatchDimsLayerInfo > &layersInfo)
nvdsinfer::BuildParams::sanityCheck
virtual bool sanityCheck() const
NvDsInferTensorOrder
NvDsInferTensorOrder
Defines UFF input layer orders.
Definition: nvdsinfer_context.h:176
nvdsinfer::ImplicitBuildParams
Holds build parameters required for implicit batch dimension network.
Definition: nvdsinfer_model_builder.h:158
nvdsinfer::CustomModelParser
Implementation of ModelParser for custom models.
Definition: nvdsinfer_model_builder.h:99
nvdsinfer::TrtEngine
Helper class written on top of nvinfer1::ICudaEngine.
Definition: nvdsinfer_model_builder.h:194
nvdsinfer::BaseModelParser::m_LibHandle
std::shared_ptr< DlLibHandle > m_LibHandle
Definition: nvdsinfer_model_builder.h:65
nvdsinfer::CustomModelParser::parseModel
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition &network) override
nvdsinfer::ExplicitBuildParams::inputProfileDims
std::vector< ProfileDims > inputProfileDims
Definition: nvdsinfer_model_builder.h:180
ds3d::DataType
DataType
Definition: idatatype.h:77
nvdsinfer::BuildParams::workspaceSize
size_t workspaceSize
Definition: nvdsinfer_model_builder.h:141
nvdsinfer::OnnxModelParser
Implementation of ModelParser for ONNX models derived from BaseModelParser.
Definition: nvdsinfer_model_builder.h:72
NvDsInferCudaEngineGetFromTltModel
bool NvDsInferCudaEngineGetFromTltModel(nvinfer1::IBuilder *const builder, nvinfer1::IBuilderConfig *const builderConfig, const NvDsInferContextInitParams *const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine *&cudaEngine)
Decodes and creates a CUDA engine file from a TLT encoded model.
nvdsinfer::ImplicitBuildParams::maxBatchSize
int maxBatchSize
Definition: nvdsinfer_model_builder.h:160
nvdsinfer::TrtModelBuilder
Helper class to build models and generate the TensorRT ICudaEngine required for inference.
Definition: nvdsinfer_model_builder.h:254
nvdsinfer::TrtEngine::printEngineInfo
void printEngineInfo()
nvdsinfer::BuildParams::outputFormats
std::unordered_map< std::string, TensorIOFormat > outputFormats
Definition: nvdsinfer_model_builder.h:146
nvdsinfer::BaseModelParser::BaseModelParser
BaseModelParser(const NvDsInferContextInitParams &params, const std::shared_ptr< DlLibHandle > &dllib)
Definition: nvdsinfer_model_builder.h:54
nvdsinfer::BuildParams::int8CalibrationFilePath
std::string int8CalibrationFilePath
Definition: nvdsinfer_model_builder.h:143
nvdsinfer::CustomModelParser::~CustomModelParser
~CustomModelParser()
Definition: nvdsinfer_model_builder.h:105
nvdsinfer::BuildParams::dlaCore
int dlaCore
Definition: nvdsinfer_model_builder.h:144
nvdsinfer::TrtModelBuilder::serializeEngine
NvDsInferStatus serializeEngine(const std::string &path, nvinfer1::ICudaEngine &engine)
nvdsinfer::BuildParams::networkMode
NvDsInferNetworkMode networkMode
Definition: nvdsinfer_model_builder.h:142
NvDsInferCudaEngineGet
bool NvDsInferCudaEngineGet(nvinfer1::IBuilder *builder, NvDsInferContextInitParams *initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine *&cudaEngine) __attribute__((deprecated("Use 'engine-create-func-name' config parameter instead")))
The NvDsInferCudaEngineGet interface has been deprecated and has been replaced by NvDsInferEngineCrea...
NvDsInferNetworkMode
NvDsInferNetworkMode
Defines internal data formats used by the inference engine.
Definition: nvdsinfer_context.h:120
nvdsinfer
Definition: nvdsinfer_model_builder.h:41
nvdsinfer::OnnxModelParser::m_OnnxParser
std::unique_ptr< nvonnxparser::IParser > m_OnnxParser
Definition: nvdsinfer_model_builder.h:89
nvdsinfer::BuildParams::TensorIOFormat
std::tuple< nvinfer1::DataType, nvinfer1::TensorFormats > TensorIOFormat
Definition: nvdsinfer_model_builder.h:137
nvdsinfer::TrtModelBuilder::~TrtModelBuilder
~TrtModelBuilder()
Definition: nvdsinfer_model_builder.h:260
NvDsInferNetworkMode_FP32
@ NvDsInferNetworkMode_FP32
Definition: nvdsinfer_context.h:122
nvdsinfer::TrtEngine::engine
nvinfer1::ICudaEngine & engine()
Definition: nvdsinfer_model_builder.h:218
nvdsinfer::ExplicitBuildParams::optBatchSize
int optBatchSize
Definition: nvdsinfer_model_builder.h:182
NvDsInferEngineCreateCustomFunc
bool(* NvDsInferEngineCreateCustomFunc)(nvinfer1::IBuilder *const builder, nvinfer1::IBuilderConfig *const builderConfig, const NvDsInferContextInitParams *const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine *&cudaEngine)
Type definition for functions that build and return a CudaEngine for custom models.
Definition: nvdsinfer_custom_impl.h:362
nvdsinfer::TrtEngine::hasDla
bool hasDla() const
Definition: nvdsinfer_model_builder.h:207
nvdsinfer::BaseModelParser::isValid
virtual bool isValid() const =0
nvdsinfer::TrtModelBuilder::TrtModelBuilder
TrtModelBuilder(int gpuId, nvinfer1::ILogger &logger, const std::shared_ptr< DlLibHandle > &dlHandle=nullptr, bool isEngineFile=false)
NvDsInferLayerInfo
Holds information about one layer in the model.
Definition: nvdsinfer.h:89
nvdsinfer_custom_impl.h
nvdsinfer::TrtModelBuilder::deserializeEngine
std::unique_ptr< TrtEngine > deserializeEngine(const std::string &path, int dla=-1)
nvdsinfer::ProfileDims
std::array< nvinfer1::Dims, nvinfer1::EnumMax< nvinfer1::OptProfileSelector >()> ProfileDims
Definition: nvdsinfer_model_builder.h:170
nvdsinfer::BaseModelParser::~BaseModelParser
virtual ~BaseModelParser()
Definition: nvdsinfer_model_builder.h:57
nvdsinfer::BaseModelParser::m_ModelParams
NvDsInferContextInitParams m_ModelParams
Definition: nvdsinfer_model_builder.h:64
nvdsinfer::ExplicitBuildParams::maxBatchSize
int maxBatchSize
Definition: nvdsinfer_model_builder.h:183
nvdsinfer::BuildParams::LayerDevicePrecision
std::tuple< nvinfer1::DataType, nvinfer1::DeviceType > LayerDevicePrecision
Definition: nvdsinfer_model_builder.h:139
nvdsinfer::OnnxModelParser::isValid
bool isValid() const override
Definition: nvdsinfer_model_builder.h:80
nvdsinfer::OnnxModelParser::getModelName
const char * getModelName() const override
Definition: nvdsinfer_model_builder.h:81
NvDsInferTensorOrder_kNCHW
@ NvDsInferTensorOrder_kNCHW
Definition: nvdsinfer_context.h:177
nvdsinfer::OnnxModelParser::OnnxModelParser
OnnxModelParser(const NvDsInferContextInitParams &initParams, const std::shared_ptr< DlLibHandle > &handle=nullptr)
Definition: nvdsinfer_model_builder.h:75
nvdsinfer::TrtModelBuilder::buildModel
std::unique_ptr< TrtEngine > buildModel(const NvDsInferContextInitParams &initParams, std::string &suggestedPathName)
nvdsinfer::TrtEngine::getDlaCore
int getDlaCore() const
Definition: nvdsinfer_model_builder.h:208
nvdsinfer::BaseModelParser
ModelParser base.
Definition: nvdsinfer_model_builder.h:51
_NvDsInferContextInitParams
Holds the initialization parameters required for the NvDsInferContext interface.
Definition: nvdsinfer_context.h:239
nvdsinfer::BuildParams::~BuildParams
virtual ~BuildParams()
Definition: nvdsinfer_model_builder.h:150
nvdsinfer::BuildParams::configBuilder
virtual NvDsInferStatus configBuilder(TrtModelBuilder &builder)=0
nvdsinfer::NvDsInferCudaEngineGetFcnDeprecated
decltype(&NvDsInferCudaEngineGet) NvDsInferCudaEngineGetFcnDeprecated
Definition: nvdsinfer_model_builder.h:43
nvdsinfer::TrtEngine::getFullDimsLayersInfo
NvDsInferStatus getFullDimsLayersInfo(int profileIdx, std::vector< NvDsInferBatchDimsLayerInfo > &layersInfo)
nvdsinfer::CustomModelParser::getModelName
const char * getModelName() const override
Definition: nvdsinfer_model_builder.h:112
nvdsinfer::TrtEngine::operator->
nvinfer1::ICudaEngine * operator->()
Definition: nvdsinfer_model_builder.h:224
nvdsinfer::safeStr
const char * safeStr(const char *str)
Definition: nvdsinfer_func_utils.h:81
nvdsinfer::ImplicitBuildParams::inputDims
std::vector< nvinfer1::Dims > inputDims
Definition: nvdsinfer_model_builder.h:161
nvdsinfer_tlt.h
nvdsinfer::BuildParams::layerDevicePrecisions
std::unordered_map< std::string, LayerDevicePrecision > layerDevicePrecisions
Definition: nvdsinfer_model_builder.h:147
nvdsinfer::ExplicitBuildParams
Holds build parameters required for full dimensions network.
Definition: nvdsinfer_model_builder.h:175
nvdsinfer::BuildParams
Holds build parameters common to implicit batch dimension/full dimension networks.
Definition: nvdsinfer_model_builder.h:134
nvdsinfer::ExplicitBuildParams::inputOrder
NvDsInferTensorOrder inputOrder
Definition: nvdsinfer_model_builder.h:184
nvdsinfer::kWorkSpaceSize
static const size_t kWorkSpaceSize
Definition: nvdsinfer_model_builder.h:45
nvdsinfer::CustomModelParser::CustomModelParser
CustomModelParser(const NvDsInferContextInitParams &initParams, const std::shared_ptr< DlLibHandle > &handle)
nvdsinfer::TrtEngine::~TrtEngine
~TrtEngine()
nvdsinfer::OnnxModelParser::parseModel
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition &network) override
nvdsinfer::ExplicitBuildParams::minBatchSize
int minBatchSize
Definition: nvdsinfer_model_builder.h:181
nvdsinfer::CustomModelParser::hasFullDimsSupported
bool hasFullDimsSupported() const override
Definition: nvdsinfer_model_builder.h:118
nvdsinfer::TrtEngine::TrtEngine
TrtEngine(std::unique_ptr< nvinfer1::ICudaEngine > &&engine, int dlaCore=-1)
Definition: nvdsinfer_model_builder.h:197
nvdsinfer::TrtModelBuilder::setInt8Calibrator
void setInt8Calibrator(std::unique_ptr< nvinfer1::IInt8Calibrator > &&calibrator)
Definition: nvdsinfer_model_builder.h:264
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:220
nvdsinfer_func_utils.h