|
NVIDIA DeepStream SDK API Reference
|
7.1 Release
|
Go to the documentation of this file.
21 #ifndef __NVDSINFER_TRTIS_BACKEND_H__
22 #define __NVDSINFER_TRTIS_BACKEND_H__
31 class TrtServerAllocator;
33 class TrtServerRequest;
34 class TrtServerResponse;
59 m_ClassifyParams.emplace_back(c); }
72 return m_ClassifyParams;}
73 const std::string&
model()
const {
return m_Model; }
74 int64_t
version()
const {
return m_ModelVersion; }
112 size_t& bytes = m_TensorMaxBytes[name];
113 bytes = std::max<size_t>(maxBytes, bytes);
150 m_ResponseAllocator = std::move(allocator);
200 const std::string& tensor,
size_t bytes,
InferMemType memType, int64_t devId);
224 using PoolKey = std::tuple<std::string, int64_t, InferMemType>;
255 std::shared_ptr<TrtServerRequest> request,
256 std::unique_ptr<TrtServerResponse> uniqResponse,
318 int64_t m_ModelVersion = -1;
327 bool m_NeedUnload =
false;
331 std::vector<TritonClassParams> m_ClassifyParams;
343 int64_t m_OutputDevId = -1;
347 int m_PerPoolSize = 2;
351 std::map<PoolKey, PoolValue> m_ResponsePool;
355 using SharedMutex = std::shared_timed_mutex;
359 SharedMutex m_ResponseMutex;
363 std::unordered_map<std::string, size_t> m_TensorMaxBytes;
368 using ReorderThread = QueueThread<std::vector<ReorderItemPtr>>;
372 std::unique_ptr<ReorderThread> m_ReorderThread;
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
SharedBufPool< UniqSysMem > PoolValue
The buffer pool for the specified tensor, GPU and memory type combination.
PoolValue findResponsePool(PoolKey &key)
Find the buffer pool for the given key.
NvDsInferStatus initialize() override
Check that the server and model is ready, get the information of layers, setup reorder thread and out...
const std::string & model() const
#define INFER_ROUND_UP(value, align)
virtual NvDsInferStatus ensureModelReady()
Check that the model is ready, load the model if it is not.
std::vector< InputShapeTuple > InputShapes
std::shared_ptr< SysMem > SharedSysMem
std::function< void(SharedBatchArray)> InputsConsumed
Function wrapper called after the input buffer is consumed.
SharedBatchArray outputs
Array of output batch buffers.
NvDsInferStatus specifyInputDims(const InputShapes &shapes) override
Specify the input layers for the backend.
Triton backend processing class.
#define INFER_MEM_ALIGNMENT
@ NVDSINFER_SUCCESS
NvDsInferContext operation succeeded.
std::future< void > future
void setAllocator(UniqTritonAllocator allocator)
Set the output tensor allocator.
InferMemType outputMemType() const
std::unique_ptr< TrtServerAllocator > UniqTritonAllocator
std::shared_ptr< TrtServerAllocator > ShrTritonAllocator
PoolValue createResponsePool(PoolKey &key, size_t bytes)
Create a new buffer pool for the key.
std::shared_ptr< TrtISServer > TrtServerPtr
Header file containing utility functions and classes used by the nvinferserver low level library.
std::function< void(NvDsInferStatus, SharedBatchArray)> AsyncDone
Asynchronous inference done function: AsyncDone(Status, outputs).
bool debatchingOutput(SharedBatchArray &outputs, SharedBatchArray &inputs)
Separate the batch dimension from the output buffer descriptors.
Header file of the common declarations for the nvinferserver library.
std::function< void(NvDsInferStatus, SharedBatchArray)> InferenceDone
Function wrapper for post inference processing.
NvDsInferStatus status
Status of processing.
int64_t outputDevId() const
NvDsInferStatus ensureInputs(SharedBatchArray &inputs)
Ensure that the array of input buffers are expected by the model and reshape the input buffers if req...
NvDsInferStatus enqueue(SharedBatchArray inputs, SharedCuStream stream, InputsConsumed bufConsumed, InferenceDone inferenceDone) override
Enqueue an input for inference request by calling Run() and adding corresponding task to the reorder ...
void setTensorMaxBytes(const std::string &name, size_t maxBytes)
Set the maximum size for the tensor, the maximum of the existing size and new input size is used.
void setOutputPoolSize(int size)
Helper function to access the member variables.
InferenceDone inferenceDone
Inference done callback function.
virtual NvDsInferStatus ensureServerReady()
Check that the Triton inference server is live.
virtual void requestTritonOutputNames(std::set< std::string > &outNames)
Get the list of output tensor names.
virtual NvDsInferStatus Run(SharedBatchArray inputs, InputsConsumed bufConsumed, AsyncDone asyncDone)
Create an inference request and trigger asynchronous inference.
std::shared_ptr< CudaStream > SharedCuStream
Cuda based pointers.
SharedSysMem allocateResponseBuf(const std::string &tensor, size_t bytes, InferMemType memType, int64_t devId)
Acquire a buffer from the output buffer pool associated with the device ID and memory type.
std::vector< TritonClassParams > getClassifyParams()
InferMemType
The memory types of inference buffers.
std::tuple< std::string, int64_t, InferMemType > PoolKey
Tuple holding tensor name, GPU ID, memory type.
void setOutputMemType(InferMemType memType)
void releaseResponseBuf(const std::string &tensor, SharedSysMem mem)
Release the output tensor buffer.
void setOutputDevId(int64_t devId)
bool inferenceDoneReorderLoop(ReorderItemPtr item)
Add input buffers to the output buffer list if required.
~TrtISBackend() override
Destructor.
int outputPoolSize() const
Base class of inference backend processing.
Header file for inference processing backend base class.
void serverInferCompleted(std::shared_ptr< TrtServerRequest > request, std::unique_ptr< TrtServerResponse > uniqResponse, InputsConsumed inputsConsumed, AsyncDone asyncDone)
Call the inputs consumed function and parse the inference response to form the array of output batch ...
std::promise< void > promise
Synchronization objects.
NvDsInferStatus fixateDims(const SharedBatchArray &bufs)
Extend the dimensions to include batch size for the buffers in input array.
NvDsInferStatus setupReorderThread()
Create a loop thread that calls inferenceDoneReorderLoop on the queued items.
SharedBatchArray inputs
Array of input batch buffers.
TrtServerPtr & server()
Get the Triton server handle.
TrtISBackend(const std::string &name, int64_t version, TrtServerPtr ptr=nullptr)
Constructor.
virtual NvDsInferStatus setupLayersInfo()
Get the model configuration from the server and populate layer information.
std::shared_ptr< BaseBatchArray > SharedBatchArray
std::shared_ptr< ReorderItem > ReorderItemPtr
void addClassifyParams(const TritonClassParams &c)
Add Triton Classification parameters to the list.
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.