|
NVIDIA DeepStream SDK API Reference
|
7.1 Release
|
Go to the documentation of this file.
21 #ifndef __INFER_GRPC_CLIENT_H__
22 #define __INFER_GRPC_CLIENT_H__
25 #include <condition_variable>
35 #include "grpc_client.h"
38 namespace tc = triton::client;
42 typedef std::map<std::string, std::string>
Headers;
71 std::vector<std::shared_ptr<tc::InferInput>>
inputs() {
return m_InferInputs; }
72 std::vector<std::shared_ptr<tc::InferRequestedOutput>>
outputs() {
return m_RequestOutputs; }
73 std::shared_ptr<tc::InferOptions>
getOption() {
return m_InferOptions;}
76 std::vector<std::string>
getOutNames() {
return m_OutputNames;}
78 void setOutNames(std::vector<std::string> outnames) {m_OutputNames = outnames;}
85 m_CpuData.push_back(data);
92 m_InputCudaBufNames.push_back(bufName);
99 std::vector<std::shared_ptr<tc::InferInput>> m_InferInputs;
103 std::vector<std::shared_ptr<tc::InferRequestedOutput>> m_RequestOutputs;
107 std::shared_ptr<tc::InferOptions> m_InferOptions;
111 std::vector<std::string> m_OutputNames;
119 std::vector<void*> m_CpuData;
123 std::vector<std::string> m_InputCudaBufNames;
155 std::string &model_name, std::string &model_version);
165 const std::string &version =
"",
const Headers &headers =
Headers());
177 bool isModelReady(
const std::string &model,
const std::string version =
"");
199 const std::vector<std::string> &outputs,
200 const std::vector<TritonClassParams>& classList = std::vector<TritonClassParams>());
239 tc::Error SetInputCudaSharedMemory(tc::InferInput *inferInput,
250 bool m_EnableCudaBufferSharing;
254 std::unique_ptr<tc::InferenceServerGrpcClient> m_GrpcClient;
258 std::atomic<uint64_t> m_LastRequestId{UINT64_C(0)};
void setInputBatchArray(SharedIBatchArray inputBatch)
std::vector< std::string > getInputCudaBufNames()
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
NvDsInferStatus inferAsync(SharedGrpcRequest request, TritonGrpcAsyncDone done)
Get the inference input and output list from the request and trigger the asynchronous inference reque...
std::shared_ptr< BaseBatchBuffer > SharedBatchBuf
Common buffer interfaces (internal).
bool isServerLive()
Check if the Triton Inference Server is live.
SharedIBatchArray inputBatchArray()
InferGrpcClient(std::string url, bool enableCudaBufferSharing)
Constructor, save the server server URL and CUDA sharing flag.
~InferGrpcClient()
Destructor, default.
NvDsInferStatus LoadModel(const std::string &model_name, const Headers &headers=Headers())
Request to load the given model using the Triton client library.
std::vector< std::string > getOutNames()
void attachData(void *data)
Append the array of host memory allocations.
bool isServerReady()
Check if the Triton Inference Server is ready.
std::map< std::string, std::string > Headers
bool isModelReady(const std::string &model, const std::string version="")
Check if the specified model is ready for inference.
Header file of the common declarations for the nvinferserver library.
std::shared_ptr< tc::InferOptions > getOption()
~TritonGrpcRequest()
Destructor, free the host memory allocated for the request.
void attachInputCudaBuffer(std::string bufName)
Append the list of shared CUDA input buffers.
std::shared_ptr< IBatchArray > SharedIBatchArray
std::vector< std::shared_ptr< tc::InferRequestedOutput > > outputs()
void setOutNames(std::vector< std::string > outnames)
Inference context library interface header file.
NvDsInferStatus getModelMetadata(inference::ModelMetadataResponse *model_metadata, std::string &model_name, std::string &model_version)
Get the model metadata from the Triton Inference server.
NvDsInferStatus appendInput(const std::shared_ptr< tc::InferInput > &input)
Append the array of Triton client library inference input objects.
std::vector< std::shared_ptr< tc::InferInput > > inputs()
NvDsInferStatus setOutput(const std::vector< std::shared_ptr< tc::InferRequestedOutput >> &output)
Helper functions to access the member variables.
Triton gRPC inference request class holding data associated with one inference request.
NvDsInferStatus UnloadModel(const std::string &model_name, const Headers &headers=Headers())
Request to unload the given model using the Triton client library.
Wrapper class for the gRPC client of the Triton Inference Server, interfaces with the Triton client l...
NvDsInferStatus Initialize()
Create the gRPC client instance of the Triton Client library.
std::shared_ptr< TritonGrpcRequest > SharedGrpcRequest
NvDsInferStatus getModelConfig(inference::ModelConfigResponse *config, const std::string &name, const std::string &version="", const Headers &headers=Headers())
Get the model configuration from the Triton Inference Server.
NvDsInferStatus setOption(std::shared_ptr< tc::InferOptions > &option)
SharedGrpcRequest createRequest(const std::string &model, const std::string &version, SharedIBatchArray input, const std::vector< std::string > &outputs, const std::vector< TritonClassParams > &classList=std::vector< TritonClassParams >())
Create a new gRPC inference request.
std::shared_ptr< BaseBatchArray > SharedBatchArray
std::function< void(NvDsInferStatus, SharedBatchArray)> TritonGrpcAsyncDone
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.