Macros
#define	TRITONSERVER_DECLSPEC

#define	TRITONSERVER_API_VERSION_MAJOR 1
	TRITONSERVER API Version. More...

#define	TRITONSERVER_API_VERSION_MINOR 34

Typedefs
typedef enum TRITONSERVER_datatype_enum	TRITONSERVER_DataType
	TRITONSERVER_DataType. More...

typedef enum TRITONSERVER_memorytype_enum	TRITONSERVER_MemoryType
	TRITONSERVER_MemoryType. More...

typedef enum TRITONSERVER_parametertype_enum	TRITONSERVER_ParameterType
	TRITONSERVER_ParameterType. More...

typedef enum TRITONSERVER_instancegroupkind_enum	TRITONSERVER_InstanceGroupKind
	TRITONSERVER_InstanceGroupKind. More...

typedef enum TRITONSERVER_loglevel_enum	TRITONSERVER_LogLevel
	TRITONSERVER_Logging. More...

typedef enum TRITONSERVER_logformat_enum	TRITONSERVER_LogFormat
	Logging Formats. More...

typedef enum TRITONSERVER_errorcode_enum	TRITONSERVER_Error_Code
	TRITONSERVER_Error. More...

typedef struct TRITONSERVER_Error (	TRITONSERVER_ResponseAllocatorAllocFn_t) (struct TRITONSERVER_ResponseAllocator allocator, const char tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void userp, void buffer, void buffer_userp, TRITONSERVER_MemoryType actual_memory_type, int64_t *actual_memory_type_id)
	TRITONSERVER_ResponseAllocator. More...

typedef struct TRITONSERVER_Error (	TRITONSERVER_ResponseAllocatorBufferAttributesFn_t) (struct TRITONSERVER_ResponseAllocator allocator, const char tensor_name, struct TRITONSERVER_BufferAttributes buffer_attributes, void userp, void *buffer_userp)
	Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes. More...

typedef struct TRITONSERVER_Error (	TRITONSERVER_ResponseAllocatorQueryFn_t) (struct TRITONSERVER_ResponseAllocator allocator, void userp, const char tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
	Type for function that is called to query the allocator's preferred memory type and memory type ID. More...

typedef struct TRITONSERVER_Error (	TRITONSERVER_ResponseAllocatorReleaseFn_t) (struct TRITONSERVER_ResponseAllocator allocator, void buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
	Type for function that is called when the server no longer holds any reference to a buffer allocated by TRITONSERVER_ResponseAllocatorAllocFn_t. More...

typedef struct TRITONSERVER_Error (	TRITONSERVER_ResponseAllocatorStartFn_t) (struct TRITONSERVER_ResponseAllocator allocator, void userp)
	Type for function that is called to indicate that subsequent allocation requests will refer to a new response. More...

typedef enum tritonserver_metricformat_enum	TRITONSERVER_MetricFormat
	TRITONSERVER_Metrics. More...

typedef enum tritonserver_tracelevel_enum	TRITONSERVER_InferenceTraceLevel
	TRITONSERVER_InferenceTrace. More...

typedef enum tritonserver_traceactivity_enum	TRITONSERVER_InferenceTraceActivity
	Trace activities. More...

typedef void(*	TRITONSERVER_InferenceTraceActivityFn_t) (struct TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void userp)
	Type for trace timeline activity callback function. More...

typedef void(*	TRITONSERVER_InferenceTraceTensorActivityFn_t) (struct TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceActivity activity, const char name, TRITONSERVER_DataType datatype, const void base, size_t byte_size, const int64_t shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)
	Type for trace tensor activity callback function. More...

typedef void(*	TRITONSERVER_InferenceTraceReleaseFn_t) (struct TRITONSERVER_InferenceTrace trace, void userp)
	Type for trace release callback function. More...

typedef enum tritonserver_requestflag_enum	TRITONSERVER_RequestFlag
	TRITONSERVER_InferenceRequest. More...

typedef enum tritonserver_requestreleaseflag_enum	TRITONSERVER_RequestReleaseFlag
	Inference request release flags. More...

typedef enum tritonserver_responsecompleteflag_enum	TRITONSERVER_ResponseCompleteFlag
	Inference response complete flags. More...

typedef void(*	TRITONSERVER_InferenceRequestReleaseFn_t) (struct TRITONSERVER_InferenceRequest request, const uint32_t flags, void userp)
	Type for inference request release callback function. More...

typedef void(*	TRITONSERVER_InferenceResponseCompleteFn_t) (struct TRITONSERVER_InferenceResponse response, const uint32_t flags, void userp)
	Type for callback function indicating that an inference response has completed. More...

typedef enum tritonserver_modelcontrolmode_enum	TRITONSERVER_ModelControlMode
	TRITONSERVER_ServerOptions. More...

typedef enum tritonserver_ratelimitmode_enum	TRITONSERVER_RateLimitMode
	Rate limit modes. More...

typedef enum tritonserver_batchflag_enum	TRITONSERVER_ModelBatchFlag
	TRITONSERVER_Server. More...

typedef enum tritonserver_modelindexflag_enum	TRITONSERVER_ModelIndexFlag
	Model index flags. The enum values must be power-of-2 values. More...

typedef enum tritonserver_txn_property_flag_enum	TRITONSERVER_ModelTxnPropertyFlag
	Model transaction policy flags. More...

typedef enum TRITONSERVER_metrickind_enum	TRITONSERVER_MetricKind
	TRITONSERVER_MetricKind. More...

Enumerations
enum	TRITONSERVER_datatype_enum { TRITONSERVER_TYPE_INVALID, TRITONSERVER_TYPE_BOOL, TRITONSERVER_TYPE_UINT8, TRITONSERVER_TYPE_UINT16, TRITONSERVER_TYPE_UINT32, TRITONSERVER_TYPE_UINT64, TRITONSERVER_TYPE_INT8, TRITONSERVER_TYPE_INT16, TRITONSERVER_TYPE_INT32, TRITONSERVER_TYPE_INT64, TRITONSERVER_TYPE_FP16, TRITONSERVER_TYPE_FP32, TRITONSERVER_TYPE_FP64, TRITONSERVER_TYPE_BYTES, TRITONSERVER_TYPE_BF16 }
	TRITONSERVER_DataType. More...

enum	TRITONSERVER_memorytype_enum { TRITONSERVER_MEMORY_CPU, TRITONSERVER_MEMORY_CPU_PINNED, TRITONSERVER_MEMORY_GPU }
	TRITONSERVER_MemoryType. More...

enum	TRITONSERVER_parametertype_enum { TRITONSERVER_PARAMETER_STRING, TRITONSERVER_PARAMETER_INT, TRITONSERVER_PARAMETER_BOOL, TRITONSERVER_PARAMETER_DOUBLE, TRITONSERVER_PARAMETER_BYTES }
	TRITONSERVER_ParameterType. More...

enum	TRITONSERVER_instancegroupkind_enum { TRITONSERVER_INSTANCEGROUPKIND_AUTO, TRITONSERVER_INSTANCEGROUPKIND_CPU, TRITONSERVER_INSTANCEGROUPKIND_GPU, TRITONSERVER_INSTANCEGROUPKIND_MODEL }
	TRITONSERVER_InstanceGroupKind. More...

enum	TRITONSERVER_loglevel_enum { TRITONSERVER_LOG_INFO, TRITONSERVER_LOG_WARN, TRITONSERVER_LOG_ERROR, TRITONSERVER_LOG_VERBOSE }
	TRITONSERVER_Logging. More...

enum	TRITONSERVER_logformat_enum { TRITONSERVER_LOG_DEFAULT, TRITONSERVER_LOG_ISO8601 }
	Logging Formats. More...

enum	TRITONSERVER_errorcode_enum { TRITONSERVER_ERROR_UNKNOWN, TRITONSERVER_ERROR_INTERNAL, TRITONSERVER_ERROR_NOT_FOUND, TRITONSERVER_ERROR_INVALID_ARG, TRITONSERVER_ERROR_UNAVAILABLE, TRITONSERVER_ERROR_UNSUPPORTED, TRITONSERVER_ERROR_ALREADY_EXISTS, TRITONSERVER_ERROR_CANCELLED }
	TRITONSERVER_Error. More...

enum	tritonserver_metricformat_enum { TRITONSERVER_METRIC_PROMETHEUS }
	TRITONSERVER_Metrics. More...

enum	tritonserver_tracelevel_enum { TRITONSERVER_TRACE_LEVEL_DISABLED = 0, TRITONSERVER_TRACE_LEVEL_MIN = 1, TRITONSERVER_TRACE_LEVEL_MAX = 2, TRITONSERVER_TRACE_LEVEL_TIMESTAMPS = 0x4, TRITONSERVER_TRACE_LEVEL_TENSORS = 0x8 }
	TRITONSERVER_InferenceTrace. More...

enum	tritonserver_traceactivity_enum { TRITONSERVER_TRACE_REQUEST_START = 0, TRITONSERVER_TRACE_QUEUE_START = 1, TRITONSERVER_TRACE_COMPUTE_START = 2, TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3, TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4, TRITONSERVER_TRACE_COMPUTE_END = 5, TRITONSERVER_TRACE_REQUEST_END = 6, TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7, TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8, TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9, TRITONSERVER_TRACE_CUSTOM_ACTIVITY = 10 }
	Trace activities. More...

enum	tritonserver_requestflag_enum { TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1, TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2 }
	TRITONSERVER_InferenceRequest. More...

enum	tritonserver_requestreleaseflag_enum { TRITONSERVER_REQUEST_RELEASE_ALL = 1, TRITONSERVER_REQUEST_RELEASE_RESCHEDULE = 2 }
	Inference request release flags. More...

enum	tritonserver_responsecompleteflag_enum { TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1 }
	Inference response complete flags. More...

enum	tritonserver_modelcontrolmode_enum { TRITONSERVER_MODEL_CONTROL_NONE, TRITONSERVER_MODEL_CONTROL_POLL, TRITONSERVER_MODEL_CONTROL_EXPLICIT }
	TRITONSERVER_ServerOptions. More...

enum	tritonserver_ratelimitmode_enum { TRITONSERVER_RATE_LIMIT_OFF, TRITONSERVER_RATE_LIMIT_EXEC_COUNT }
	Rate limit modes. More...

enum	tritonserver_batchflag_enum { TRITONSERVER_BATCH_UNKNOWN = 1, TRITONSERVER_BATCH_FIRST_DIM = 2 }
	TRITONSERVER_Server. More...

enum	tritonserver_modelindexflag_enum { TRITONSERVER_INDEX_FLAG_READY = 1 }
	Model index flags. The enum values must be power-of-2 values. More...

enum	tritonserver_txn_property_flag_enum { TRITONSERVER_TXN_ONE_TO_ONE = 1, TRITONSERVER_TXN_DECOUPLED = 2 }
	Model transaction policy flags. More...

enum	TRITONSERVER_metrickind_enum { TRITONSERVER_METRIC_KIND_COUNTER, TRITONSERVER_METRIC_KIND_GAUGE, TRITONSERVER_METRIC_KIND_HISTOGRAM }
	TRITONSERVER_MetricKind. More...

Functions
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ApiVersion (uint32_t major, uint32_t minor)
	Get the TRITONBACKEND API version supported by the Triton shared library. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_DataTypeString (TRITONSERVER_DataType datatype)
	Get the string representation of a data type. More...

TRITONSERVER_DECLSPEC TRITONSERVER_DataType	TRITONSERVER_StringToDataType (const char *dtype)
	Get the Triton datatype corresponding to a string representation of a datatype. More...

TRITONSERVER_DECLSPEC uint32_t	TRITONSERVER_DataTypeByteSize (TRITONSERVER_DataType datatype)
	Get the size of a Triton datatype in bytes. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_MemoryTypeString (TRITONSERVER_MemoryType memtype)
	Get the string representation of a memory type. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_ParameterTypeString (TRITONSERVER_ParameterType paramtype)
	Get the string representation of a parameter type. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter *	TRITONSERVER_ParameterNew (const char name, const TRITONSERVER_ParameterType type, const void value)
	Create a new parameter object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter *	TRITONSERVER_ParameterBytesNew (const char name, const void byte_ptr, const uint64_t size)
	Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES. More...

TRITONSERVER_DECLSPEC void	TRITONSERVER_ParameterDelete (struct TRITONSERVER_Parameter *parameter)
	Delete an parameter object. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_InstanceGroupKindString (TRITONSERVER_InstanceGroupKind kind)
	Get the string representation of an instance-group kind. More...

TRITONSERVER_DECLSPEC bool	TRITONSERVER_LogIsEnabled (TRITONSERVER_LogLevel level)
	Is a log level enabled? More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_LogMessage (TRITONSERVER_LogLevel level, const char filename, const int line, const char msg)
	Log a message at a given log level if that level is enabled. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ErrorNew (TRITONSERVER_Error_Code code, const char *msg)
	Create a new error object. More...

TRITONSERVER_DECLSPEC void	TRITONSERVER_ErrorDelete (struct TRITONSERVER_Error *error)
	Delete an error object. More...

TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code	TRITONSERVER_ErrorCode (struct TRITONSERVER_Error *error)
	Get the error code. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_ErrorCodeString (struct TRITONSERVER_Error *error)
	Get the string representation of an error code. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_ErrorMessage (struct TRITONSERVER_Error *error)
	Get the error message. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ResponseAllocatorNew (struct TRITONSERVER_ResponseAllocator **allocator, TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn, TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn, TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
	Create a new response allocator object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction (struct TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
	Set the buffer attributes function for a response allocator object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ResponseAllocatorSetQueryFunction (struct TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
	Set the query function to a response allocator object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ResponseAllocatorDelete (struct TRITONSERVER_ResponseAllocator *allocator)
	Delete a response allocator. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MessageNewFromSerializedJson (struct TRITONSERVER_Message *message, const char base, size_t byte_size)
	TRITONSERVER_Message. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MessageDelete (struct TRITONSERVER_Message *message)
	Delete a message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MessageSerializeToJson (struct TRITONSERVER_Message message, const char base, size_t byte_size)
	Get the base and size of the buffer containing the serialized message in JSON format. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricsDelete (struct TRITONSERVER_Metrics *metrics)
	Delete a metrics object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricsFormatted (struct TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format, const char base, size_t byte_size)
	Get a buffer containing the metrics in the specified format. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_InferenceTraceLevelString (TRITONSERVER_InferenceTraceLevel level)
	Get the string representation of a trace level. More...

const TRITONSERVER_DECLSPEC char *	TRITONSERVER_InferenceTraceActivityString (TRITONSERVER_InferenceTraceActivity activity)
	Get the string representation of a trace activity. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceNew (struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void trace_userp)
	Create a new inference trace object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceTensorNew (struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void trace_userp)
	Create a new inference trace object. More...

TRITONSERVER_DECLSPEC TRITONSERVER_Error *	TRITONSERVER_InferenceTraceReportActivity (TRITONSERVER_InferenceTrace trace, uint64_t timestamp, const char activity_name)
	Report a trace activity. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceDelete (struct TRITONSERVER_InferenceTrace *trace)
	Delete a trace object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceId (struct TRITONSERVER_InferenceTrace trace, uint64_t id)
	Get the id associated with a trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceParentId (struct TRITONSERVER_InferenceTrace trace, uint64_t parent_id)
	Get the parent id associated with a trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceModelName (struct TRITONSERVER_InferenceTrace trace, const char *model_name)
	Get the name of the model associated with a trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceModelVersion (struct TRITONSERVER_InferenceTrace trace, int64_t model_version)
	Get the version of the model associated with a trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceRequestId (struct TRITONSERVER_InferenceTrace trace, const char *request_id)
	Get the request id associated with a trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceSpawnChildTrace (struct TRITONSERVER_InferenceTrace trace, struct TRITONSERVER_InferenceTrace *child_trace)
	Get the child trace, spawned from the parent trace. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceSetContext (struct TRITONSERVER_InferenceTrace trace, const char trace_context)
	Set TRITONSERVER_InferenceTrace context. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceTraceContext (struct TRITONSERVER_InferenceTrace trace, const char *trace_context)
	Get TRITONSERVER_InferenceTrace context. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestNew (struct TRITONSERVER_InferenceRequest *inference_request, struct TRITONSERVER_Server server, const char *model_name, const int64_t model_version)
	Create a new inference request object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestDelete (struct TRITONSERVER_InferenceRequest *inference_request)
	Delete an inference request object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestId (struct TRITONSERVER_InferenceRequest inference_request, const char *id)
	Get the ID for a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetId (struct TRITONSERVER_InferenceRequest inference_request, const char id)
	Set the ID for a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestFlags (struct TRITONSERVER_InferenceRequest inference_request, uint32_t flags)
	Get the flag(s) associated with a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetFlags (struct TRITONSERVER_InferenceRequest *inference_request, uint32_t flags)
	Set the flag(s) associated with a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestCorrelationId (struct TRITONSERVER_InferenceRequest inference_request, uint64_t correlation_id)
	Get the correlation ID of the inference request as an unsigned integer. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestCorrelationIdString (struct TRITONSERVER_InferenceRequest inference_request, const char *correlation_id)
	Get the correlation ID of the inference request as a string. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetCorrelationId (struct TRITONSERVER_InferenceRequest *inference_request, uint64_t correlation_id)
	Set the correlation ID of the inference request to be an unsigned integer. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetCorrelationIdString (struct TRITONSERVER_InferenceRequest inference_request, const char correlation_id)
	Set the correlation ID of the inference request to be a string. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestCancel (struct TRITONSERVER_InferenceRequest *inference_request)
	Cancel an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestIsCancelled (struct TRITONSERVER_InferenceRequest inference_request, bool is_cancelled)
	Query whether the request is cancelled or not. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestPriority (struct TRITONSERVER_InferenceRequest inference_request, uint32_t priority)
	Deprecated. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestPriorityUInt64 (struct TRITONSERVER_InferenceRequest inference_request, uint64_t priority)
	Get the priority for a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetPriority (struct TRITONSERVER_InferenceRequest *inference_request, uint32_t priority)
	Deprecated. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetPriorityUInt64 (struct TRITONSERVER_InferenceRequest *inference_request, uint64_t priority)
	Set the priority for a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestTimeoutMicroseconds (struct TRITONSERVER_InferenceRequest inference_request, uint64_t timeout_us)
	Get the timeout for a request, in microseconds. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetTimeoutMicroseconds (struct TRITONSERVER_InferenceRequest *inference_request, uint64_t timeout_us)
	Set the timeout for a request, in microseconds. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAddInput (struct TRITONSERVER_InferenceRequest inference_request, const char name, const TRITONSERVER_DataType datatype, const int64_t *shape, uint64_t dim_count)
	Add an input to a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAddRawInput (struct TRITONSERVER_InferenceRequest inference_request, const char name)
	Add a raw input to a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestRemoveInput (struct TRITONSERVER_InferenceRequest inference_request, const char name)
	Remove an input from a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestRemoveAllInputs (struct TRITONSERVER_InferenceRequest *inference_request)
	Remove all inputs from a request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAppendInputData (struct TRITONSERVER_InferenceRequest inference_request, const char name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
	Assign a buffer of data to an input. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy (struct TRITONSERVER_InferenceRequest inference_request, const char name, const void base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, const char host_policy_name)
	Assign a buffer of data to an input for execution on all model instances with the specified host policy. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes (struct TRITONSERVER_InferenceRequest inference_request, const char name, const void base, struct TRITONSERVER_BufferAttributes buffer_attributes)
	Assign a buffer of data to an input. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestRemoveAllInputData (struct TRITONSERVER_InferenceRequest inference_request, const char name)
	Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the input with TRITONSERVER_InferenceRequestAppendInputData or TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestAddRequestedOutput (struct TRITONSERVER_InferenceRequest inference_request, const char name)
	Add an output request to an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestRemoveRequestedOutput (struct TRITONSERVER_InferenceRequest inference_request, const char name)
	Remove an output request from an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs (struct TRITONSERVER_InferenceRequest *inference_request)
	Remove all output requests from an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetReleaseCallback (struct TRITONSERVER_InferenceRequest inference_request, TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn, void request_release_userp)
	Set the release callback for an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetResponseCallback (struct TRITONSERVER_InferenceRequest inference_request, struct TRITONSERVER_ResponseAllocator response_allocator, void response_allocator_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void response_userp)
	Set the allocator and response callback for an inference request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetStringParameter (struct TRITONSERVER_InferenceRequest request, const char key, const char *value)
	Set a string parameter in the request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetIntParameter (struct TRITONSERVER_InferenceRequest request, const char key, const int64_t value)
	Set an integer parameter in the request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetBoolParameter (struct TRITONSERVER_InferenceRequest request, const char key, const bool value)
	Set a boolean parameter in the request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceRequestSetDoubleParameter (struct TRITONSERVER_InferenceRequest request, const char key, const double value)
	Set a double parameter in the request. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseDelete (struct TRITONSERVER_InferenceResponse *inference_response)
	TRITONSERVER_InferenceResponse. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseError (struct TRITONSERVER_InferenceResponse *inference_response)
	Return the error status of an inference response. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseModel (struct TRITONSERVER_InferenceResponse inference_response, const char model_name, int64_t model_version)
	Get model used to produce a response. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseId (struct TRITONSERVER_InferenceResponse inference_response, const char *request_id)
	Get the ID of the request corresponding to a response. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseParameterCount (struct TRITONSERVER_InferenceResponse inference_response, uint32_t count)
	Get the number of parameters available in the response. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseParameter (struct TRITONSERVER_InferenceResponse inference_response, const uint32_t index, const char name, TRITONSERVER_ParameterType type, const void **vvalue)
	Get all information about a parameter. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseOutputCount (struct TRITONSERVER_InferenceResponse inference_response, uint32_t count)
	Get the number of outputs available in the response. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseOutput (struct TRITONSERVER_InferenceResponse inference_response, const uint32_t index, const char name, TRITONSERVER_DataType datatype, const int64_t *shape, uint64_t dim_count, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void **userp)
	Get all information about an output tensor. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_InferenceResponseOutputClassificationLabel (struct TRITONSERVER_InferenceResponse inference_response, const uint32_t index, const size_t class_index, const char *label)
	Get a classification label associated with an output for a given index. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesNew (struct TRITONSERVER_BufferAttributes **buffer_attributes)
	TRITONSERVER_BufferAttributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesDelete (struct TRITONSERVER_BufferAttributes *buffer_attributes)
	Delete a buffer attributes object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesSetMemoryTypeId (struct TRITONSERVER_BufferAttributes *buffer_attributes, int64_t memory_type_id)
	Set the memory type id field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesSetMemoryType (struct TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType memory_type)
	Set the memory type field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesSetCudaIpcHandle (struct TRITONSERVER_BufferAttributes buffer_attributes, void cuda_ipc_handle)
	Set the CudaIpcHandle field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesSetByteSize (struct TRITONSERVER_BufferAttributes *buffer_attributes, size_t byte_size)
	Set the byte size field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesMemoryTypeId (struct TRITONSERVER_BufferAttributes buffer_attributes, int64_t memory_type_id)
	Get the memory type id field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesMemoryType (struct TRITONSERVER_BufferAttributes buffer_attributes, TRITONSERVER_MemoryType memory_type)
	Get the memory type field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesCudaIpcHandle (struct TRITONSERVER_BufferAttributes buffer_attributes, void *cuda_ipc_handle)
	Get the CudaIpcHandle field of the buffer attributes object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_BufferAttributesByteSize (struct TRITONSERVER_BufferAttributes buffer_attributes, size_t byte_size)
	Get the byte size field of the buffer attributes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsNew (struct TRITONSERVER_ServerOptions **options)
	Create a new server options object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsDelete (struct TRITONSERVER_ServerOptions *options)
	Delete a server options object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetServerId (struct TRITONSERVER_ServerOptions options, const char server_id)
	Set the textual ID for the server in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelRepositoryPath (struct TRITONSERVER_ServerOptions options, const char model_repository_path)
	Set the model repository path in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelControlMode (struct TRITONSERVER_ServerOptions *options, TRITONSERVER_ModelControlMode mode)
	Set the model control mode in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetStartupModel (struct TRITONSERVER_ServerOptions options, const char model_name)
	Set the model to be loaded at startup in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetStrictModelConfig (struct TRITONSERVER_ServerOptions *options, bool strict)
	Enable or disable strict model configuration handling in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelConfigName (struct TRITONSERVER_ServerOptions options, const char model_config_name)
	Set the custom model configuration name to load for all models. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetRateLimiterMode (struct TRITONSERVER_ServerOptions *options, TRITONSERVER_RateLimitMode mode)
	Set the rate limit mode in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsAddRateLimiterResource (struct TRITONSERVER_ServerOptions options, const char resource_name, const size_t resource_count, const int device)
	Add resource count for rate limiting. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize (struct TRITONSERVER_ServerOptions *options, uint64_t size)
	Set the total pinned memory byte size that the server can allocate in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize (struct TRITONSERVER_ServerOptions *options, int gpu_device, uint64_t size)
	Set the total CUDA memory byte size that the server can allocate on given GPU device in a server options. More...

TRITONSERVER_DECLSPEC TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize (TRITONSERVER_ServerOptions *options, int gpu_device, size_t cuda_virtual_address_size)
	Set the size of the virtual address space that will be used for growable memory in implicit state. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetResponseCacheByteSize (struct TRITONSERVER_ServerOptions *options, uint64_t size)
	Deprecated. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetCacheConfig (struct TRITONSERVER_ServerOptions options, const char cache_name, const char *config_json)
	Set the cache config that will be used to initialize the cache implementation for "cache_name". More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetCacheDirectory (struct TRITONSERVER_ServerOptions options, const char cache_dir)
	Set the directory containing cache shared libraries. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability (struct TRITONSERVER_ServerOptions *options, double cc)
	Set the minimum support CUDA compute capability in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetExitOnError (struct TRITONSERVER_ServerOptions *options, bool exit)
	Enable or disable exit-on-error in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetStrictReadiness (struct TRITONSERVER_ServerOptions *options, bool strict)
	Enable or disable strict readiness handling in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetExitTimeout (struct TRITONSERVER_ServerOptions *options, unsigned int timeout)
	Set the exit timeout, in seconds, for the server in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetBufferManagerThreadCount (struct TRITONSERVER_ServerOptions *options, unsigned int thread_count)
	Set the number of threads used in buffer manager in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelLoadThreadCount (struct TRITONSERVER_ServerOptions *options, unsigned int thread_count)
	Set the number of threads to concurrently load models in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelLoadRetryCount (struct TRITONSERVER_ServerOptions *options, unsigned int retry_count)
	Set the number of retry to load a model in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelNamespacing (struct TRITONSERVER_ServerOptions *options, bool enable_namespace)
	Enable model namespacing to allow serving models with the same name if they are in different namespaces. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetEnablePeerAccess (struct TRITONSERVER_ServerOptions *options, bool enable_peer_access)
	Enable peer access to allow GPU device to directly access the memory of another GPU device. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogFile (struct TRITONSERVER_ServerOptions options, const char file)
	Provide a log output file. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogInfo (struct TRITONSERVER_ServerOptions *options, bool log)
	Enable or disable info level logging. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogWarn (struct TRITONSERVER_ServerOptions *options, bool log)
	Enable or disable warning level logging. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogError (struct TRITONSERVER_ServerOptions *options, bool log)
	Enable or disable error level logging. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogFormat (struct TRITONSERVER_ServerOptions *options, const TRITONSERVER_LogFormat format)
	Set the logging format. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetLogVerbose (struct TRITONSERVER_ServerOptions *options, int level)
	Set verbose logging level. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetMetrics (struct TRITONSERVER_ServerOptions *options, bool metrics)
	Enable or disable metrics collection in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetGpuMetrics (struct TRITONSERVER_ServerOptions *options, bool gpu_metrics)
	Enable or disable GPU metrics collection in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetCpuMetrics (struct TRITONSERVER_ServerOptions *options, bool cpu_metrics)
	Enable or disable CPU metrics collection in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetMetricsInterval (struct TRITONSERVER_ServerOptions *options, uint64_t metrics_interval_ms)
	Set the interval for metrics collection in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetBackendDirectory (struct TRITONSERVER_ServerOptions options, const char backend_dir)
	Set the directory containing backend shared libraries. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetRepoAgentDirectory (struct TRITONSERVER_ServerOptions options, const char repoagent_dir)
	Set the directory containing repository agent shared libraries. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit (struct TRITONSERVER_ServerOptions *options, const TRITONSERVER_InstanceGroupKind kind, const int device_id, const double fraction)
	Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetBackendConfig (struct TRITONSERVER_ServerOptions options, const char backend_name, const char setting, const char value)
	Set a configuration setting for a named backend in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetHostPolicy (struct TRITONSERVER_ServerOptions options, const char policy_name, const char setting, const char value)
	Set a host policy setting for a given policy name in a server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerOptionsSetMetricsConfig (struct TRITONSERVER_ServerOptions options, const char name, const char setting, const char value)
	Set a configuration setting for metrics in server options. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerNew (struct TRITONSERVER_Server *server, struct TRITONSERVER_ServerOptions options)
	Create a new server object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerDelete (struct TRITONSERVER_Server *server)
	Delete a server object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerStop (struct TRITONSERVER_Server *server)
	Stop a server object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerSetExitTimeout (struct TRITONSERVER_Server *server, unsigned int timeout)
	Set the exit timeout on the server object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerRegisterModelRepository (struct TRITONSERVER_Server server, const char repository_path, const struct TRITONSERVER_Parameter **name_mapping, const uint32_t mapping_count)
	Register a new model repository. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerUnregisterModelRepository (struct TRITONSERVER_Server server, const char repository_path)
	Unregister a model repository. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerPollModelRepository (struct TRITONSERVER_Server *server)
	Check the model repository for changes and update server state based on those changes. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerIsLive (struct TRITONSERVER_Server server, bool live)
	Is the server live? More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerIsReady (struct TRITONSERVER_Server server, bool ready)
	Is the server ready? More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelIsReady (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, bool *ready)
	Is the model ready? More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelBatchProperties (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, uint32_t flags, void *voidp)
	Get the batch properties of the model. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelTransactionProperties (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, uint32_t txn_flags, void *voidp)
	Get the transaction policy of the model. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerMetadata (struct TRITONSERVER_Server server, struct TRITONSERVER_Message *server_metadata)
	Get the metadata of the server as a TRITONSERVER_Message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelMetadata (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, struct TRITONSERVER_Message **model_metadata)
	Get the metadata of a model as a TRITONSERVER_Message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelStatistics (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, struct TRITONSERVER_Message **model_stats)
	Get the statistics of a model as a TRITONSERVER_Message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelConfig (struct TRITONSERVER_Server server, const char model_name, const int64_t model_version, const uint32_t config_version, struct TRITONSERVER_Message **model_config)
	Get the configuration of a model as a TRITONSERVER_Message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerModelIndex (struct TRITONSERVER_Server server, uint32_t flags, struct TRITONSERVER_Message *model_index)
	Get the index of all unique models in the model repositories as a TRITONSERVER_Message object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerLoadModel (struct TRITONSERVER_Server server, const char model_name)
	Load the requested model or reload the model if it is already loaded. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerLoadModelWithParameters (struct TRITONSERVER_Server server, const char model_name, const struct TRITONSERVER_Parameter **parameters, const uint64_t parameter_count)
	Load the requested model or reload the model if it is already loaded, with load parameters provided. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerUnloadModel (struct TRITONSERVER_Server server, const char model_name)
	Unload the requested model. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerUnloadModelAndDependents (struct TRITONSERVER_Server server, const char model_name)
	Unload the requested model, and also unload any dependent model that was loaded along with the requested model (for example, the models composing an ensemble). More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerMetrics (struct TRITONSERVER_Server server, struct TRITONSERVER_Metrics *metrics)
	Get the current metrics for the server. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_ServerInferAsync (struct TRITONSERVER_Server server, struct TRITONSERVER_InferenceRequest inference_request, struct TRITONSERVER_InferenceTrace *trace)
	Perform inference using the meta-data and inputs supplied by the 'inference_request'. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricFamilyNew (struct TRITONSERVER_MetricFamily *family, const TRITONSERVER_MetricKind kind, const char name, const char *description)
	Create a new metric family object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricFamilyDelete (struct TRITONSERVER_MetricFamily *family)
	Delete a metric family object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_GetMetricFamilyKind (struct TRITONSERVER_MetricFamily family, TRITONSERVER_MetricKind kind)
	Get the TRITONSERVER_MetricKind of the metric family. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricArgsNew (struct TRITONSERVER_MetricArgs **args)
	Create a new metric args object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricArgsSetHistogram (struct TRITONSERVER_MetricArgs args, const double buckets, const uint64_t buckets_count)
	Set metric args with histogram metric parameter. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricArgsDelete (struct TRITONSERVER_MetricArgs *args)
	Delete a metric args object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricNew (struct TRITONSERVER_Metric *metric, struct TRITONSERVER_MetricFamily family, const struct TRITONSERVER_Parameter **labels, const uint64_t label_count)
	Create a new metric object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricNewWithArgs (struct TRITONSERVER_Metric *metric, struct TRITONSERVER_MetricFamily family, const struct TRITONSERVER_Parameter *labels, const uint64_t label_count, const struct TRITONSERVER_MetricArgs args)
	Create a new metric object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricDelete (struct TRITONSERVER_Metric *metric)
	Delete a metric object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricValue (struct TRITONSERVER_Metric metric, double value)
	Get the current value of a metric object. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricIncrement (struct TRITONSERVER_Metric *metric, double value)
	Increment the current value of metric by value. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricSet (struct TRITONSERVER_Metric *metric, double value)
	Set the current value of metric to value. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_MetricObserve (struct TRITONSERVER_Metric *metric, double value)
	Sample an observation and count it to the appropriate bucket of a metric. More...

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error *	TRITONSERVER_GetMetricKind (struct TRITONSERVER_Metric metric, TRITONSERVER_MetricKind kind)
	Get the TRITONSERVER_MetricKind of metric of its corresponding family. More...

Macro Definition Documentation

◆ TRITONSERVER_API_VERSION_MAJOR

#define TRITONSERVER_API_VERSION_MAJOR 1

TRITONSERVER API Version.

The TRITONSERVER API is versioned with major and minor version numbers. Any change to the API that does not impact backwards compatibility (for example, adding a non-required function) increases the minor version number. Any change that breaks backwards compatibility (for example, deleting or changing the behavior of a function) increases the major version number. A client should check that the API version used to compile the client is compatible with the API version of the Triton shared library that it is linking against. This is typically done by code similar to the following which makes sure that the major versions are equal and that the minor version of the Triton shared library is >= the minor version used to build the client.

uint32_t api_version_major, api_version_minor; TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor); if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) || (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "triton server API version does not support this client"); }

Definition at line 94 of file tritonserver.h.

◆ TRITONSERVER_API_VERSION_MINOR

#define TRITONSERVER_API_VERSION_MINOR 34

Definition at line 95 of file tritonserver.h.

◆ TRITONSERVER_DECLSPEC

#define TRITONSERVER_DECLSPEC

Definition at line 50 of file tritonserver.h.

Typedef Documentation

◆ TRITONSERVER_DataType

typedef enum TRITONSERVER_datatype_enum TRITONSERVER_DataType

TRITONSERVER_DataType.

Tensor data types recognized by TRITONSERVER.

◆ TRITONSERVER_Error_Code

typedef enum TRITONSERVER_errorcode_enum TRITONSERVER_Error_Code

TRITONSERVER_Error.

Errors are reported by a TRITONSERVER_Error object. A NULL TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error indicates error and the code and message for the error can be retrieved from the object.

The caller takes ownership of a TRITONSERVER_Error object returned by the API and must call TRITONSERVER_ErrorDelete to release the object. The TRITONSERVER_Error error codes

◆ TRITONSERVER_InferenceRequestReleaseFn_t

typedef void(* TRITONSERVER_InferenceRequestReleaseFn_t) (struct TRITONSERVER_InferenceRequest *request, const uint32_t flags, void *userp)

Type for inference request release callback function.

The callback indicates what type of release is being performed on the request and for some of these the callback function takes ownership of the TRITONSERVER_InferenceRequest object. The 'userp' data is the data provided as 'request_release_userp' in the call to TRITONSERVER_InferenceRequestSetReleaseCallback.

One or more flags will be specified when the callback is invoked, and the callback must take the following actions:

TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request is being released and ownership is passed to the callback function. Triton will not longer access the 'request' object itself nor any input tensor data associated with the request. The callback should free or otherwise manage the 'request' object and all associated tensor data.
TRITONSERVER_REQUEST_RELEASE_RESCHEDULE: This flag is currently being consumed internally and the callback is not expected to receive nor process this kind of release. The backend will call TRITONBACKEND_RequestRelease with this flag when it wishes to reschedule the request back to the model. An example is that the model is recursively performing inference of the request and use the rescheduling to proceed the recursive execution.

Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always be set when the callback is invoked but in the future that may change, so the callback should explicitly check for the flag before taking ownership of the request object.

Definition at line 1004 of file tritonserver.h.

◆ TRITONSERVER_InferenceResponseCompleteFn_t

typedef void(* TRITONSERVER_InferenceResponseCompleteFn_t) (struct TRITONSERVER_InferenceResponse *response, const uint32_t flags, void *userp)

Type for callback function indicating that an inference response has completed.

The callback function takes ownership of the TRITONSERVER_InferenceResponse object. The 'userp' data is the data provided as 'response_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.

One or more flags may be specified when the callback is invoked:

TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more responses will be generated for a given request (more specifically, that no more responses will be generated for the inference request that set this callback and 'userp'). When this flag is set 'response' may be a response object or may be nullptr. If 'response' is not nullptr, then 'response' is the last response that Triton will produce for the request. If 'response' is nullptr then Triton is indicating that no more responses will be produced for the request.

Definition at line 1025 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceActivity

typedef enum tritonserver_traceactivity_enum TRITONSERVER_InferenceTraceActivity

Trace activities.

◆ TRITONSERVER_InferenceTraceActivityFn_t

typedef void(* TRITONSERVER_InferenceTraceActivityFn_t) (struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void *userp)

Type for trace timeline activity callback function.

This callback function is used to report activity occurring for a trace. This function does not take ownership of 'trace' and so any information needed from that object must be copied before returning. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceNew.

Definition at line 755 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceLevel

typedef enum tritonserver_tracelevel_enum TRITONSERVER_InferenceTraceLevel

TRITONSERVER_InferenceTrace.

Object that represents tracing for an inference request. Trace levels. The trace level controls the type of trace activities that are reported for an inference request.

Trace level values are power-of-2 and can be combined to trace multiple types of activities. For example, use (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS | TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and tensors for an inference request.

TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are deprecated and should not be used.

◆ TRITONSERVER_InferenceTraceReleaseFn_t

typedef void(* TRITONSERVER_InferenceTraceReleaseFn_t) (struct TRITONSERVER_InferenceTrace *trace, void *userp)

Type for trace release callback function.

This callback function is called when all activity for the trace has completed. The callback function takes ownership of the TRITONSERVER_InferenceTrace object. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceNew.

Definition at line 778 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceTensorActivityFn_t

typedef void(* TRITONSERVER_InferenceTraceTensorActivityFn_t) (struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, const char *name, TRITONSERVER_DataType datatype, const void *base, size_t byte_size, const int64_t *shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)

Type for trace tensor activity callback function.

This callback function is used to report tensor activity occurring for a trace. This function does not take ownership of 'trace' and so any information needed from that object must be copied before returning. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceTensorNew.

Definition at line 766 of file tritonserver.h.

◆ TRITONSERVER_InstanceGroupKind

typedef enum TRITONSERVER_instancegroupkind_enum TRITONSERVER_InstanceGroupKind

TRITONSERVER_InstanceGroupKind.

Kinds of instance groups recognized by TRITONSERVER.

◆ TRITONSERVER_LogFormat

typedef enum TRITONSERVER_logformat_enum TRITONSERVER_LogFormat

Logging Formats.

The TRITONSERVER API offers two logging formats. The formats have a common set of fields but differ in how the timestamp for a log entry is represented. Messages are serialized according to JSON encoding rules by default. This behavior can be disabled by setting the environment variable TRITON_SERVER_ESCAPE_LOG_MESSAGES to "0".

TRITONSERVER_LOG_DEFAULT

Example:

I0520 20:03:25.829575 3355 model_lifecycle.cc:441] "AsyncLoad() 'simple'"

TRITONSERVER_LOG_ISO8601

Example:

2024-05-20T20:03:26Z I 3415 model_lifecycle.cc:441] "AsyncLoad() 'simple'"

◆ TRITONSERVER_LogLevel

typedef enum TRITONSERVER_loglevel_enum TRITONSERVER_LogLevel

TRITONSERVER_Logging.

Types/levels of logging.

◆ TRITONSERVER_MemoryType

typedef enum TRITONSERVER_memorytype_enum TRITONSERVER_MemoryType

TRITONSERVER_MemoryType.

Types of memory recognized by TRITONSERVER.

◆ TRITONSERVER_MetricFormat

typedef enum tritonserver_metricformat_enum TRITONSERVER_MetricFormat

TRITONSERVER_Metrics.

Object representing metrics. Metric format types

◆ TRITONSERVER_MetricKind

typedef enum TRITONSERVER_metrickind_enum TRITONSERVER_MetricKind

TRITONSERVER_MetricKind.

Types of metrics recognized by TRITONSERVER.

◆ TRITONSERVER_ModelBatchFlag

typedef enum tritonserver_batchflag_enum TRITONSERVER_ModelBatchFlag

TRITONSERVER_Server.

An inference server. Model batch flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_ModelControlMode

typedef enum tritonserver_modelcontrolmode_enum TRITONSERVER_ModelControlMode

TRITONSERVER_ServerOptions.

Options to use when creating an inference server. Model control modes

◆ TRITONSERVER_ModelIndexFlag

typedef enum tritonserver_modelindexflag_enum TRITONSERVER_ModelIndexFlag

Model index flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_ModelTxnPropertyFlag

typedef enum tritonserver_txn_property_flag_enum TRITONSERVER_ModelTxnPropertyFlag

Model transaction policy flags.

The enum values must be power-of-2 values.

◆ TRITONSERVER_ParameterType

typedef enum TRITONSERVER_parametertype_enum TRITONSERVER_ParameterType

TRITONSERVER_ParameterType.

Types of parameters recognized by TRITONSERVER.

◆ TRITONSERVER_RateLimitMode

typedef enum tritonserver_ratelimitmode_enum TRITONSERVER_RateLimitMode

Rate limit modes.

◆ TRITONSERVER_RequestFlag

typedef enum tritonserver_requestflag_enum TRITONSERVER_RequestFlag

TRITONSERVER_InferenceRequest.

Object representing an inference request. The inference request provides the meta-data and input tensor values needed for an inference and returns the inference result meta-data and output tensors. An inference request object can be modified and reused multiple times. Inference request flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_RequestReleaseFlag

typedef enum tritonserver_requestreleaseflag_enum TRITONSERVER_RequestReleaseFlag

Inference request release flags.

The enum values must be power-of-2 values.

◆ TRITONSERVER_ResponseAllocatorAllocFn_t

typedef struct TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorAllocFn_t) (struct TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp, void **buffer, void **buffer_userp, TRITONSERVER_MemoryType *actual_memory_type, int64_t *actual_memory_type_id)

TRITONSERVER_ResponseAllocator.

Object representing a memory allocator for output tensors in an inference response. Type for allocation function that allocates a buffer to hold an output tensor.

Parameters

allocator	The allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_name	The name of the output tensor to allocate for.
byte_size	The size of the buffer to allocate.
memory_type	The type of memory that the caller prefers for the buffer allocation.
memory_type_id	The ID of the memory that the caller prefers for the buffer allocation.
userp	The user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
buffer	Returns a pointer to the allocated memory.
buffer_userp	Returns a user-specified value to associate with the buffer, or nullptr if no user-specified value should be associated with the buffer. This value will be provided in the call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer is released and will also be returned by TRITONSERVER_InferenceResponseOutput.
actual_memory_type	Returns the type of memory where the allocation resides. May be different than the type of memory requested by 'memory_type'.
actual_memory_type_id	Returns the ID of the memory where the allocation resides. May be different than the ID of the memory requested by 'memory_type_id'.

Returns: a TRITONSERVER_Error object if a failure occurs while attempting an allocation. If an error is returned all other return values will be ignored.

Definition at line 414 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorBufferAttributesFn_t

typedef struct TRITONSERVER_Error*( * TRITONSERVER_ResponseAllocatorBufferAttributesFn_t) (struct TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, struct TRITONSERVER_BufferAttributes *buffer_attributes, void *userp, void *buffer_userp)

Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes.

The callback function must fill in the appropriate buffer attributes information related to this buffer. If set, this function is always called after TRITONSERVER_ResponseAllocatorAllocFn_t function.

Parameters

allocator	The allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_name	The name of the output tensor to allocate for.
buffer_attributes	The buffer attributes associated with the buffer.
userp	The user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
buffer_userp	Returns a user-specified value to associate with the buffer, or nullptr if no user-specified value should be associated with the buffer. This value will be provided in the call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer is released and will also be returned by TRITONSERVER_InferenceResponseOutput.

Returns: a TRITONSERVER_Error object if a failure occurs while attempting an allocation. If an error is returned all other return values will be ignored.

Definition at line 444 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorQueryFn_t

typedef struct TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorQueryFn_t) (struct TRITONSERVER_ResponseAllocator *allocator, void *userp, const char *tensor_name, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id)

Type for function that is called to query the allocator's preferred memory type and memory type ID.

As much as possible, the allocator should attempt to return the same memory_type and memory_type_id values that will be returned by the subsequent call to TRITONSERVER_ResponseAllocatorAllocFn_t. But the allocator is not required to do so.

Parameters

allocator	The allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
userp	The user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_name	The name of the output tensor. This is optional and it should be set to nullptr to indicate that the tensor name has not determined.
byte_size	The expected size of the buffer. This is optional and it should be set to nullptr to indicate that the byte size has not determined.
memory_type	Acts as both input and output. On input gives the memory type preferred by the caller. Returns memory type preferred by the allocator, taken account of the caller preferred type.
memory_type_id	Acts as both input and output. On input gives the memory type ID preferred by the caller. Returns memory type ID preferred by the allocator, taken account of the caller preferred type ID.

Returns: a TRITONSERVER_Error object if a failure occurs.

Definition at line 473 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorReleaseFn_t

typedef struct TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorReleaseFn_t) (struct TRITONSERVER_ResponseAllocator *allocator, void *buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)

Type for function that is called when the server no longer holds any reference to a buffer allocated by TRITONSERVER_ResponseAllocatorAllocFn_t.

In practice this function is typically called when the response object associated with the buffer is deleted by TRITONSERVER_InferenceResponseDelete.

Parameters

allocator	The allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
buffer	Pointer to the buffer to be freed.
buffer_userp	The user-specified value associated with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
byte_size	The size of the buffer.
memory_type	The type of memory holding the buffer.
memory_type_id	The ID of the memory holding the buffer.

Returns: a TRITONSERVER_Error object if a failure occurs while attempting the release. If an error is returned Triton will not attempt to release the buffer again.

Definition at line 495 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorStartFn_t

typedef struct TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorStartFn_t) (struct TRITONSERVER_ResponseAllocator *allocator, void *userp)

Type for function that is called to indicate that subsequent allocation requests will refer to a new response.

Parameters

allocator	The allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
userp	The user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.

Returns: a TRITONSERVER_Error object if a failure occurs.

Definition at line 509 of file tritonserver.h.

◆ TRITONSERVER_ResponseCompleteFlag

typedef enum tritonserver_responsecompleteflag_enum TRITONSERVER_ResponseCompleteFlag

Inference response complete flags.

The enum values must be power-of-2 values.

Enumeration Type Documentation

◆ tritonserver_batchflag_enum

enum tritonserver_batchflag_enum

TRITONSERVER_Server.

An inference server. Model batch flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_BATCH_UNKNOWN
TRITONSERVER_BATCH_FIRST_DIM

Definition at line 2270 of file tritonserver.h.

◆ TRITONSERVER_datatype_enum

enum TRITONSERVER_datatype_enum

TRITONSERVER_DataType.

Tensor data types recognized by TRITONSERVER.

Enumerator
TRITONSERVER_TYPE_INVALID
TRITONSERVER_TYPE_BOOL
TRITONSERVER_TYPE_UINT8
TRITONSERVER_TYPE_UINT16
TRITONSERVER_TYPE_UINT32
TRITONSERVER_TYPE_UINT64
TRITONSERVER_TYPE_INT8
TRITONSERVER_TYPE_INT16
TRITONSERVER_TYPE_INT32
TRITONSERVER_TYPE_INT64
TRITONSERVER_TYPE_FP16
TRITONSERVER_TYPE_FP32
TRITONSERVER_TYPE_FP64
TRITONSERVER_TYPE_BYTES
TRITONSERVER_TYPE_BF16

Definition at line 115 of file tritonserver.h.

◆ TRITONSERVER_errorcode_enum

enum TRITONSERVER_errorcode_enum

TRITONSERVER_Error.

Errors are reported by a TRITONSERVER_Error object. A NULL TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error indicates error and the code and message for the error can be retrieved from the object.

The caller takes ownership of a TRITONSERVER_Error object returned by the API and must call TRITONSERVER_ErrorDelete to release the object. The TRITONSERVER_Error error codes

Enumerator
TRITONSERVER_ERROR_UNKNOWN
TRITONSERVER_ERROR_INTERNAL
TRITONSERVER_ERROR_NOT_FOUND
TRITONSERVER_ERROR_INVALID_ARG
TRITONSERVER_ERROR_UNAVAILABLE
TRITONSERVER_ERROR_UNSUPPORTED
TRITONSERVER_ERROR_ALREADY_EXISTS
TRITONSERVER_ERROR_CANCELLED

Definition at line 324 of file tritonserver.h.

◆ TRITONSERVER_instancegroupkind_enum

enum TRITONSERVER_instancegroupkind_enum

TRITONSERVER_InstanceGroupKind.

Kinds of instance groups recognized by TRITONSERVER.

Enumerator
TRITONSERVER_INSTANCEGROUPKIND_AUTO
TRITONSERVER_INSTANCEGROUPKIND_CPU
TRITONSERVER_INSTANCEGROUPKIND_GPU
TRITONSERVER_INSTANCEGROUPKIND_MODEL

Definition at line 236 of file tritonserver.h.

◆ TRITONSERVER_logformat_enum

enum TRITONSERVER_logformat_enum

Logging Formats.

The TRITONSERVER API offers two logging formats. The formats have a common set of fields but differ in how the timestamp for a log entry is represented. Messages are serialized according to JSON encoding rules by default. This behavior can be disabled by setting the environment variable TRITON_SERVER_ESCAPE_LOG_MESSAGES to "0".

TRITONSERVER_LOG_DEFAULT

Example:

I0520 20:03:25.829575 3355 model_lifecycle.cc:441] "AsyncLoad() 'simple'"

TRITONSERVER_LOG_ISO8601

Example:

2024-05-20T20:03:26Z I 3415 model_lifecycle.cc:441] "AsyncLoad() 'simple'"

Enumerator
TRITONSERVER_LOG_DEFAULT
TRITONSERVER_LOG_ISO8601

Definition at line 289 of file tritonserver.h.

◆ TRITONSERVER_loglevel_enum

enum TRITONSERVER_loglevel_enum

TRITONSERVER_Logging.

Types/levels of logging.

Enumerator
TRITONSERVER_LOG_INFO
TRITONSERVER_LOG_WARN
TRITONSERVER_LOG_ERROR
TRITONSERVER_LOG_VERBOSE

Definition at line 256 of file tritonserver.h.

◆ TRITONSERVER_memorytype_enum

enum TRITONSERVER_memorytype_enum

TRITONSERVER_MemoryType.

Types of memory recognized by TRITONSERVER.

Enumerator
TRITONSERVER_MEMORY_CPU
TRITONSERVER_MEMORY_CPU_PINNED
TRITONSERVER_MEMORY_GPU

Definition at line 163 of file tritonserver.h.

◆ tritonserver_metricformat_enum

enum tritonserver_metricformat_enum

TRITONSERVER_Metrics.

Object representing metrics. Metric format types

Enumerator
TRITONSERVER_METRIC_PROMETHEUS

Definition at line 652 of file tritonserver.h.

◆ TRITONSERVER_metrickind_enum

enum TRITONSERVER_metrickind_enum

TRITONSERVER_MetricKind.

Types of metrics recognized by TRITONSERVER.

Enumerator
TRITONSERVER_METRIC_KIND_COUNTER
TRITONSERVER_METRIC_KIND_GAUGE
TRITONSERVER_METRIC_KIND_HISTOGRAM

Definition at line 2617 of file tritonserver.h.

◆ tritonserver_modelcontrolmode_enum

enum tritonserver_modelcontrolmode_enum

TRITONSERVER_ServerOptions.

Options to use when creating an inference server. Model control modes

Enumerator
TRITONSERVER_MODEL_CONTROL_NONE
TRITONSERVER_MODEL_CONTROL_POLL
TRITONSERVER_MODEL_CONTROL_EXPLICIT

Definition at line 1761 of file tritonserver.h.

◆ tritonserver_modelindexflag_enum

enum tritonserver_modelindexflag_enum

Model index flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_INDEX_FLAG_READY

Definition at line 2276 of file tritonserver.h.

◆ TRITONSERVER_parametertype_enum

enum TRITONSERVER_parametertype_enum

TRITONSERVER_ParameterType.

Types of parameters recognized by TRITONSERVER.

Enumerator
TRITONSERVER_PARAMETER_STRING
TRITONSERVER_PARAMETER_INT
TRITONSERVER_PARAMETER_BOOL
TRITONSERVER_PARAMETER_DOUBLE
TRITONSERVER_PARAMETER_BYTES

Definition at line 182 of file tritonserver.h.

◆ tritonserver_ratelimitmode_enum

enum tritonserver_ratelimitmode_enum

Rate limit modes.

Enumerator
TRITONSERVER_RATE_LIMIT_OFF
TRITONSERVER_RATE_LIMIT_EXEC_COUNT

Definition at line 1768 of file tritonserver.h.

◆ tritonserver_requestflag_enum

enum tritonserver_requestflag_enum

TRITONSERVER_InferenceRequest.

Object representing an inference request. The inference request provides the meta-data and input tensor values needed for an inference and returns the inference result meta-data and output tensors. An inference request object can be modified and reused multiple times. Inference request flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
TRITONSERVER_REQUEST_FLAG_SEQUENCE_END

Definition at line 957 of file tritonserver.h.

◆ tritonserver_requestreleaseflag_enum

enum tritonserver_requestreleaseflag_enum

Inference request release flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_REQUEST_RELEASE_ALL
TRITONSERVER_REQUEST_RELEASE_RESCHEDULE

Definition at line 964 of file tritonserver.h.

◆ tritonserver_responsecompleteflag_enum

enum tritonserver_responsecompleteflag_enum

Inference response complete flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_RESPONSE_COMPLETE_FINAL

Definition at line 971 of file tritonserver.h.

◆ tritonserver_traceactivity_enum

enum tritonserver_traceactivity_enum

Trace activities.

Enumerator
TRITONSERVER_TRACE_REQUEST_START
TRITONSERVER_TRACE_QUEUE_START
TRITONSERVER_TRACE_COMPUTE_START
TRITONSERVER_TRACE_COMPUTE_INPUT_END
TRITONSERVER_TRACE_COMPUTE_OUTPUT_START
TRITONSERVER_TRACE_COMPUTE_END
TRITONSERVER_TRACE_REQUEST_END
TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT
TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT
TRITONSERVER_TRACE_CUSTOM_ACTIVITY

Definition at line 726 of file tritonserver.h.

◆ tritonserver_tracelevel_enum

enum tritonserver_tracelevel_enum

TRITONSERVER_InferenceTrace.

Object that represents tracing for an inference request. Trace levels. The trace level controls the type of trace activities that are reported for an inference request.

Trace level values are power-of-2 and can be combined to trace multiple types of activities. For example, use (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS | TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and tensors for an inference request.

TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are deprecated and should not be used.

Enumerator
TRITONSERVER_TRACE_LEVEL_DISABLED	Tracing disabled. No trace activities are reported.
TRITONSERVER_TRACE_LEVEL_MIN	Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_TRACE_LEVEL_MAX	Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS	Record timestamps for the inference request.
TRITONSERVER_TRACE_LEVEL_TENSORS	Record input and output tensor values for the inference request.

Definition at line 703 of file tritonserver.h.

◆ tritonserver_txn_property_flag_enum

enum tritonserver_txn_property_flag_enum

Model transaction policy flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_TXN_ONE_TO_ONE
TRITONSERVER_TXN_DECOUPLED

Definition at line 2282 of file tritonserver.h.

Function Documentation

◆ TRITONSERVER_ApiVersion()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ApiVersion	(	uint32_t *	major,
		uint32_t *	minor
	)

Get the TRITONBACKEND API version supported by the Triton shared library.

This value can be compared against the TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR used to build the client to ensure that Triton shared library is compatible with the client.

Parameters

major	Returns the TRITONSERVER API major version supported by Triton.
minor	Returns the TRITONSERVER API minor version supported by Triton.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesByteSize()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesByteSize	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		size_t *	byte_size
	)

Get the byte size field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
byte_size	Returns the byte size associated with the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesCudaIpcHandle()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesCudaIpcHandle	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		void **	cuda_ipc_handle
	)

Get the CudaIpcHandle field of the buffer attributes object.

Parameters

buffer_attributes	The buffer attributes object.
cuda_ipc_handle	Returns the memory type associated with the buffer attributes object. If the cudaIpcHandle does not exist for the buffer, nullptr will be returned.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesDelete ( struct TRITONSERVER_BufferAttributes * buffer_attributes )

Delete a buffer attributes object.

Parameters

buffer_attributes The buffer_attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesMemoryType()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryType	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		TRITONSERVER_MemoryType *	memory_type
	)

Get the memory type field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
memory_type	Returns the memory type associated with the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesMemoryTypeId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryTypeId	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		int64_t *	memory_type_id
	)

Get the memory type id field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
memory_type_id	Returns the memory type id associated with the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesNew ( struct TRITONSERVER_BufferAttributes ** buffer_attributes )

TRITONSERVER_BufferAttributes.

API to create, modify, or retrieve attributes associated with a buffer. Create a new buffer attributes object. The caller takes ownership of the TRITONSERVER_BufferAttributes object and must call TRITONSERVER_BufferAttributesDelete to release the object.

Parameters

buffer_attributes Returns the new buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetByteSize()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetByteSize	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		size_t	byte_size
	)

Set the byte size field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
byte_size	Byte size to assign to the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetCudaIpcHandle()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetCudaIpcHandle	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		void *	cuda_ipc_handle
	)

Set the CudaIpcHandle field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
cuda_ipc_handle	The CudaIpcHandle to assign to the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetMemoryType()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryType	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		TRITONSERVER_MemoryType	memory_type
	)

Set the memory type field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
memory_type	Memory type to assign to the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetMemoryTypeId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryTypeId	(	struct TRITONSERVER_BufferAttributes *	buffer_attributes,
		int64_t	memory_type_id
	)

Set the memory type id field of the buffer attributes.

Parameters

buffer_attributes	The buffer attributes object.
memory_type_id	Memory type id to assign to the buffer attributes object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_DataTypeByteSize()

TRITONSERVER_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize ( TRITONSERVER_DataType datatype )

Get the size of a Triton datatype in bytes.

Zero is returned for TRITONSERVER_TYPE_BYTES because it have variable size. Zero is returned for TRITONSERVER_TYPE_INVALID.

Parameters

dtype The datatype.

Returns: The size of the datatype.

◆ TRITONSERVER_DataTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_DataTypeString ( TRITONSERVER_DataType datatype )

Get the string representation of a data type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

datatype The data type.

Returns: The string representation of the data type.

◆ TRITONSERVER_ErrorCode()

TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code TRITONSERVER_ErrorCode ( struct TRITONSERVER_Error * error )

Get the error code.

Parameters

error The error object.

Returns: The error code.

◆ TRITONSERVER_ErrorCodeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ErrorCodeString ( struct TRITONSERVER_Error * error )

Get the string representation of an error code.

The returned string is not owned by the caller and so should not be modified or freed. The lifetime of the returned string extends only as long as 'error' and must not be accessed once 'error' is deleted.

Parameters

error The error object.

Returns: The string representation of the error code.

◆ TRITONSERVER_ErrorDelete()

TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete ( struct TRITONSERVER_Error * error )

Delete an error object.

Parameters

error The error object.

◆ TRITONSERVER_ErrorMessage()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ErrorMessage ( struct TRITONSERVER_Error * error )

Get the error message.

The returned string is not owned by the caller and so should not be modified or freed. The lifetime of the returned string extends only as long as 'error' and must not be accessed once 'error' is deleted.

Parameters

error The error object.

Returns: The error message.

◆ TRITONSERVER_ErrorNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ErrorNew	(	TRITONSERVER_Error_Code	code,
		const char *	msg
	)

Create a new error object.

The caller takes ownership of the TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to release the object.

Parameters

code	The error code.
msg	The error message.

Returns: A new TRITONSERVER_Error object.

◆ TRITONSERVER_GetMetricFamilyKind()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_GetMetricFamilyKind	(	struct TRITONSERVER_MetricFamily *	family,
		TRITONSERVER_MetricKind *	kind
	)

Get the TRITONSERVER_MetricKind of the metric family.

Parameters

family	The metric family object to query.
kind	Returns the TRITONSERVER_MetricKind of metric.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_GetMetricKind()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_GetMetricKind	(	struct TRITONSERVER_Metric *	metric,
		TRITONSERVER_MetricKind *	kind
	)

Get the TRITONSERVER_MetricKind of metric of its corresponding family.

Parameters

metric	The metric object to query.
kind	Returns the TRITONSERVER_MetricKind of metric.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddInput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddInput	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name,
		const TRITONSERVER_DataType	datatype,
		const int64_t *	shape,
		uint64_t	dim_count
	)

Add an input to a request.

Parameters

inference_request	The request object.
name	The name of the input.
datatype	The type of the input. Valid type names are BOOL, UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16, FP32, FP64, and BYTES.
shape	The shape of the input.
dim_count	The number of dimensions of 'shape'.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddRawInput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRawInput	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name
	)

Add a raw input to a request.

The name recognized by the model, data type and shape of the input will be deduced from model configuration. This function must be called at most once on request with no other input to ensure the deduction is accurate.

Parameters

inference_request	The request object.
name	The name of the input. This name is only used as a reference of the raw input in other Tritonserver APIs. It doesn't associate with the name used in the model.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddRequestedOutput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRequestedOutput	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name
	)

Add an output request to an inference request.

Parameters

inference_request	The request object.
name	The name of the output.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputData()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputData	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name,
		const void *	base,
		size_t	byte_size,
		TRITONSERVER_MemoryType	memory_type,
		int64_t	memory_type_id
	)

Assign a buffer of data to an input.

The buffer will be appended to any existing buffers for that input. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'.

Parameters

inference_request	The request object.
name	The name of the input.
base	The base address of the input data.
byte_size	The size, in bytes, of the input data.
memory_type	The memory type of the input data.
memory_type_id	The memory type id of the input data.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name,
		const void *	base,
		struct TRITONSERVER_BufferAttributes *	buffer_attributes
	)

Assign a buffer of data to an input.

The buffer will be appended to any existing buffers for that input. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'.

Parameters

inference_request	The request object.
name	The name of the input.
base	The base address of the input data.
buffer_attributes	The buffer attributes of the input.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name,
		const void *	base,
		size_t	byte_size,
		TRITONSERVER_MemoryType	memory_type,
		int64_t	memory_type_id,
		const char *	host_policy_name
	)

Assign a buffer of data to an input for execution on all model instances with the specified host policy.

The buffer will be appended to any existing buffers for that input on all devices with this host policy. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'. If the execution is scheduled on a device that does not have a input buffer specified using this function, then the input buffer specified with TRITONSERVER_InferenceRequestAppendInputData will be used so a non-host policy specific version of data must be added using that API.

Parameters

inference_request	The request object.
name	The name of the input.
base	The base address of the input data.
byte_size	The size, in bytes, of the input data.
memory_type	The memory type of the input data.
memory_type_id	The memory type id of the input data.
host_policy_name	All model instances executing with this host_policy will use this input buffer for execution.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestCancel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestCancel ( struct TRITONSERVER_InferenceRequest * inference_request )

Cancel an inference request.

Requests are canceled on a best effort basis and no guarantee is provided that cancelling a request will result in early termination. Note that the inference request cancellation status will be reset after TRITONSERVER_InferAsync is run. This means that if you cancel the request before calling TRITONSERVER_InferAsync the request will not be cancelled.

Parameters

inference_request The request object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestCorrelationId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationId	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t *	correlation_id
	)

Get the correlation ID of the inference request as an unsigned integer.

Default is 0, which indicates that the request has no correlation ID. If the correlation id associated with the inference request is a string, this function will return a failure. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters

inference_request	The request object.
correlation_id	Returns the correlation ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestCorrelationIdString()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationIdString	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char **	correlation_id
	)

Get the correlation ID of the inference request as a string.

Default is empty "", which indicates that the request has no correlation ID. If the correlation id associated with the inference request is an unsigned integer, then this function will return a failure. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters

inference_request	The request object.
correlation_id	Returns the correlation ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestDelete ( struct TRITONSERVER_InferenceRequest * inference_request )

Delete an inference request object.

Parameters

inference_request The request object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestFlags()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestFlags	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint32_t *	flags
	)

Get the flag(s) associated with a request.

On return 'flags' holds a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for available flags.

Parameters

inference_request	The request object.
flags	Returns the flags.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestId	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char **	id
	)

Get the ID for a request.

The returned ID is owned by 'inference_request' and must not be modified or freed by the caller.

Parameters

inference_request	The request object.
id	Returns the ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestIsCancelled()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestIsCancelled	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		bool *	is_cancelled
	)

Query whether the request is cancelled or not.

If possible the backend should terminate any processing and send an error response with cancelled status.

Parameters

inference_request	The request object.
is_cancelled	Returns whether the inference request is cancelled or not.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestNew	(	struct TRITONSERVER_InferenceRequest **	inference_request,
		struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version
	)

Create a new inference request object.

Parameters

inference_request	Returns the new request object.
server	the inference server object.
model_name	The name of the model to use for the request.
model_version	The version of the model to use for the request. If -1 then the server will choose a version based on the model's policy.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestPriority()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriority	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint32_t *	priority
	)

Deprecated.

See TRITONSERVER_InferenceRequestPriorityUInt64 instead.

Get the priority for a request. The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters

inference_request	The request object.
priority	Returns the priority level.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestPriorityUInt64()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriorityUInt64	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t *	priority
	)

Get the priority for a request.

The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters

inference_request	The request object.
priority	Returns the priority level.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveAllInputData()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputData	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name
	)

Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the input with TRITONSERVER_InferenceRequestAppendInputData or TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy.

Parameters

inference_request	The request object.
name	The name of the input.

◆ TRITONSERVER_InferenceRequestRemoveAllInputs()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputs ( struct TRITONSERVER_InferenceRequest * inference_request )

Remove all inputs from a request.

Parameters

inference_request The request object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs ( struct TRITONSERVER_InferenceRequest * inference_request )

Remove all output requests from an inference request.

Parameters

inference_request The request object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveInput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveInput	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name
	)

Remove an input from a request.

Parameters

inference_request	The request object.
name	The name of the input.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveRequestedOutput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveRequestedOutput	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	name
	)

Remove an output request from an inference request.

Parameters

inference_request	The request object.
name	The name of the output.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetBoolParameter()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetBoolParameter	(	struct TRITONSERVER_InferenceRequest *	request,
		const char *	key,
		const bool	value
	)

Set a boolean parameter in the request.

Parameters

request	The request.
key	The name of the parameter.
value	The value of the parameter.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetCorrelationId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationId	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t	correlation_id
	)

Set the correlation ID of the inference request to be an unsigned integer.

Default is 0, which indicates that the request has no correlation ID. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters

inference_request	The request object.
correlation_id	The correlation ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetCorrelationIdString()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationIdString	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	correlation_id
	)

Set the correlation ID of the inference request to be a string.

The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters

inference_request	The request object.
correlation_id	The correlation ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetDoubleParameter()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetDoubleParameter	(	struct TRITONSERVER_InferenceRequest *	request,
		const char *	key,
		const double	value
	)

Set a double parameter in the request.

Parameters

request	The request.
key	The name of the parameter.
value	The value of the parameter.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetFlags()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint32_t	flags
	)

Set the flag(s) associated with a request.

'flags' should hold a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for available flags.

Parameters

inference_request	The request object.
flags	The flags.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetId	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		const char *	id
	)

Set the ID for a request.

Parameters

inference_request	The request object.
id	The ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetIntParameter()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetIntParameter	(	struct TRITONSERVER_InferenceRequest *	request,
		const char *	key,
		const int64_t	value
	)

Set an integer parameter in the request.

Parameters

request	The request.
key	The name of the parameter.
value	The value of the parameter.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetPriority()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetPriority	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint32_t	priority
	)

Deprecated.

See TRITONSERVER_InferenceRequestSetPriorityUInt64 instead.

Set the priority for a request. The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters

inference_request	The request object.
priority	The priority level.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetPriorityUInt64()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetPriorityUInt64	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t	priority
	)

Set the priority for a request.

The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters

inference_request	The request object.
priority	The priority level.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetReleaseCallback()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetReleaseCallback	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		TRITONSERVER_InferenceRequestReleaseFn_t	request_release_fn,
		void *	request_release_userp
	)

Set the release callback for an inference request.

The release callback is called by Triton to return ownership of the request object.

Parameters

inference_request	The request object.
request_release_fn	The function called to return ownership of the 'inference_request' object.
request_release_userp	User-provided pointer that is delivered to the 'request_release_fn' callback.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetResponseCallback()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetResponseCallback	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		struct TRITONSERVER_ResponseAllocator *	response_allocator,
		void *	response_allocator_userp,
		TRITONSERVER_InferenceResponseCompleteFn_t	response_fn,
		void *	response_userp
	)

Set the allocator and response callback for an inference request.

The allocator is used to allocate buffers for any output tensors included in responses that are produced for this request. The response callback is called to return response objects representing responses produced for this request. Typically 'response_allocator_userp' and 'response_userp' will no longer be referenced after 'response_fn' is invoked with 'TRITONSERVER_RESPONSE_COMPLETE_FINAL' flag, therefore the user may release 'response_allocator_userp' and 'response_userp' at that point.

Parameters

inference_request	The request object.
response_allocator	The TRITONSERVER_ResponseAllocator to use to allocate buffers to hold inference results.
response_allocator_userp	User-provided pointer that is delivered to the response allocator's start and allocation functions.
response_fn	The function called to deliver an inference response for this request.
response_userp	User-provided pointer that is delivered to the 'response_fn' callback.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetStringParameter()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetStringParameter	(	struct TRITONSERVER_InferenceRequest *	request,
		const char *	key,
		const char *	value
	)

Set a string parameter in the request.

Parameters

request	The request.
key	The name of the parameter.
value	The value of the parameter.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetTimeoutMicroseconds()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetTimeoutMicroseconds	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t	timeout_us
	)

Set the timeout for a request, in microseconds.

The default is 0 which indicates that the request has no timeout.

Parameters

inference_request	The request object.
timeout_us	The timeout, in microseconds.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestTimeoutMicroseconds()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestTimeoutMicroseconds	(	struct TRITONSERVER_InferenceRequest *	inference_request,
		uint64_t *	timeout_us
	)

Get the timeout for a request, in microseconds.

The default is 0 which indicates that the request has no timeout.

Parameters

inference_request	The request object.
timeout_us	Returns the timeout, in microseconds.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseDelete ( struct TRITONSERVER_InferenceResponse * inference_response )

TRITONSERVER_InferenceResponse.

Object representing an inference response. The inference response provides the meta-data and output tensor values calculated by the inference. Delete an inference response object.

Parameters

inference_response The response object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseError()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseError ( struct TRITONSERVER_InferenceResponse * inference_response )

Return the error status of an inference response.

Return a TRITONSERVER_Error object on failure, return nullptr on success. The returned error object is owned by 'inference_response' and so should not be deleted by the caller.

Parameters

inference_response The response object.

Returns: a TRITONSERVER_Error indicating the success or failure status of the response.

◆ TRITONSERVER_InferenceResponseId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseId	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		const char **	request_id
	)

Get the ID of the request corresponding to a response.

The caller does not own the returned ID and must not modify or delete it. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters

inference_response	The response object.
request_id	Returns the ID of the request corresponding to this response.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseModel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseModel	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		const char **	model_name,
		int64_t *	model_version
	)

Get model used to produce a response.

The caller does not own the returned model name value and must not modify or delete it. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters

inference_response	The response object.
model_name	Returns the name of the model.
model_version	Returns the version of the model. this response.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutput()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutput	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		const uint32_t	index,
		const char **	name,
		TRITONSERVER_DataType *	datatype,
		const int64_t **	shape,
		uint64_t *	dim_count,
		const void **	base,
		size_t *	byte_size,
		TRITONSERVER_MemoryType *	memory_type,
		int64_t *	memory_type_id,
		void **	userp
	)

Get all information about an output tensor.

The tensor data is returned as the base pointer to the data and the size, in bytes, of the data. The caller does not own any of the returned values and must not modify or delete them. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters

inference_response	The response object.
index	The index of the output tensor, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseOutputCount.
name	Returns the name of the output.
datatype	Returns the type of the output.
shape	Returns the shape of the output.
dim_count	Returns the number of dimensions of the returned shape.
base	Returns the tensor data for the output.
byte_size	Returns the size, in bytes, of the data.
memory_type	Returns the memory type of the data.
memory_type_id	Returns the memory type id of the data.
userp	The user-specified value associated with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutputClassificationLabel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputClassificationLabel	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		const uint32_t	index,
		const size_t	class_index,
		const char **	label
	)

Get a classification label associated with an output for a given index.

The caller does not own the returned label and must not modify or delete it. The lifetime of all returned label extends until 'inference_response' is deleted.

Parameters

inference_response	The response object.
index	The index of the output tensor, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseOutputCount.
class_index	The index of the class.
name	Returns the label corresponding to 'class_index' or nullptr if no label.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutputCount()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputCount	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		uint32_t *	count
	)

Get the number of outputs available in the response.

Parameters

inference_response	The response object.
count	Returns the number of output tensors.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseParameter()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameter	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		const uint32_t	index,
		const char **	name,
		TRITONSERVER_ParameterType *	type,
		const void **	vvalue
	)

Get all information about a parameter.

The caller does not own any of the returned values and must not modify or delete them. The lifetime of all returned values extends until 'inference_response' is deleted.

The 'vvalue' returns a void* pointer that must be cast appropriately based on 'type'. For example:

void* vvalue; TRITONSERVER_ParameterType type; TRITONSERVER_InferenceResponseParameter( response, index, &name, &type, &vvalue); switch (type) { case TRITONSERVER_PARAMETER_BOOL: bool value = *(reinterpret_cast<bool*>(vvalue)); ... case TRITONSERVER_PARAMETER_INT: int64_t value = *(reinterpret_cast<int64_t*>(vvalue)); ... case TRITONSERVER_PARAMETER_STRING: const char* value = reinterpret_cast<const char*>(vvalue); ...

Parameters

inference_response	The response object.
index	The index of the parameter, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseParameterCount.
name	Returns the name of the parameter.
type	Returns the type of the parameter.
vvalue	Returns a pointer to the parameter value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseParameterCount()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameterCount	(	struct TRITONSERVER_InferenceResponse *	inference_response,
		uint32_t *	count
	)

Get the number of parameters available in the response.

Parameters

inference_response	The response object.
count	Returns the number of parameters.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceActivityString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InferenceTraceActivityString ( TRITONSERVER_InferenceTraceActivity activity )

Get the string representation of a trace activity.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

activity The trace activity.

Returns: The string representation of the trace activity.

◆ TRITONSERVER_InferenceTraceContext()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceContext	(	struct TRITONSERVER_InferenceTrace *	trace,
		const char **	trace_context
	)

Get TRITONSERVER_InferenceTrace context.

Parameters

trace	The trace.
trace_context	Returns the context associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceDelete ( struct TRITONSERVER_InferenceTrace * trace )

Delete a trace object.

Parameters

trace The trace object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceId	(	struct TRITONSERVER_InferenceTrace *	trace,
		uint64_t *	id
	)

Get the id associated with a trace.

Every trace is assigned an id that is unique across all traces created for a Triton server.

Parameters

trace	The trace.
id	Returns the id associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceLevelString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InferenceTraceLevelString ( TRITONSERVER_InferenceTraceLevel level )

Get the string representation of a trace level.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

level The trace level.

Returns: The string representation of the trace level.

◆ TRITONSERVER_InferenceTraceModelName()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelName	(	struct TRITONSERVER_InferenceTrace *	trace,
		const char **	model_name
	)

Get the name of the model associated with a trace.

The caller does not own the returned string and must not modify or delete it. The lifetime of the returned string extends only as long as 'trace'.

Parameters

trace	The trace.
model_name	Returns the name of the model associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceModelVersion()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelVersion	(	struct TRITONSERVER_InferenceTrace *	trace,
		int64_t *	model_version
	)

Get the version of the model associated with a trace.

Parameters

trace	The trace.
model_version	Returns the version of the model associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew	(	struct TRITONSERVER_InferenceTrace **	trace,
		TRITONSERVER_InferenceTraceLevel	level,
		uint64_t	parent_id,
		TRITONSERVER_InferenceTraceActivityFn_t	activity_fn,
		TRITONSERVER_InferenceTraceReleaseFn_t	release_fn,
		void *	trace_userp
	)

Create a new inference trace object.

The caller takes ownership of the TRITONSERVER_InferenceTrace object and must call TRITONSERVER_InferenceTraceDelete to release the object.

The activity callback function will be called to report activity for 'trace' as well as for any child traces that are spawned by 'trace', and so the activity callback must check the trace object to determine specifically what activity is being reported.

The release callback is called for both 'trace' and for any child traces spawned by 'trace'.

Parameters

trace	Returns the new inference trace object.
level	The tracing level.
parent_id	The parent trace id for this trace. A value of 0 indicates that there is not parent trace.
activity_fn	The callback function where activity for the trace is reported.
release_fn	The callback function called when all activity is complete for the trace.
trace_userp	User-provided pointer that is delivered to the activity and release callback functions.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceParentId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceParentId	(	struct TRITONSERVER_InferenceTrace *	trace,
		uint64_t *	parent_id
	)

Get the parent id associated with a trace.

The parent id indicates a parent-child relationship between two traces. A parent id value of 0 indicates that there is no parent trace.

Parameters

trace	The trace.
id	Returns the parent id associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceReportActivity()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceReportActivity	(	TRITONSERVER_InferenceTrace *	trace,
		uint64_t	timestamp,
		const char *	activity_name
	)

Report a trace activity.

All the traces reported using this API will be using TRITONSERVER_TRACE_CUSTOM_ACTIVITY type.

Parameters

trace	The trace object.
timestamp	The timestamp associated with the trace activity.
name	The trace activity name.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceRequestId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceRequestId	(	struct TRITONSERVER_InferenceTrace *	trace,
		const char **	request_id
	)

Get the request id associated with a trace.

The caller does not own the returned string and must not modify or delete it. The lifetime of the returned string extends only as long as 'trace'.

Parameters

trace	The trace.
request_id	Returns the version of the model associated with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceSetContext()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceSetContext	(	struct TRITONSERVER_InferenceTrace *	trace,
		const char *	trace_context
	)

Set TRITONSERVER_InferenceTrace context.

Parameters

trace	The trace.
trace_context	A new trace context to associate with the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceSpawnChildTrace()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceSpawnChildTrace	(	struct TRITONSERVER_InferenceTrace *	trace,
		struct TRITONSERVER_InferenceTrace **	child_trace
	)

Get the child trace, spawned from the parent trace.

The caller owns the returned object and must call TRITONSERVER_InferenceTraceDelete to release the object, unless ownership is transferred through other APIs (see TRITONSERVER_ServerInferAsync).

Parameters

trace	The trace.
child_trace	Returns the child trace, spawned from the trace.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceTensorNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceTensorNew	(	struct TRITONSERVER_InferenceTrace **	trace,
		TRITONSERVER_InferenceTraceLevel	level,
		uint64_t	parent_id,
		TRITONSERVER_InferenceTraceActivityFn_t	activity_fn,
		TRITONSERVER_InferenceTraceTensorActivityFn_t	tensor_activity_fn,
		TRITONSERVER_InferenceTraceReleaseFn_t	release_fn,
		void *	trace_userp
	)

Create a new inference trace object.

The caller takes ownership of the TRITONSERVER_InferenceTrace object and must call TRITONSERVER_InferenceTraceDelete to release the object.

The timeline and tensor activity callback function will be called to report activity for 'trace' as well as for any child traces that are spawned by 'trace', and so the activity callback must check the trace object to determine specifically what activity is being reported.

The release callback is called for both 'trace' and for any child traces spawned by 'trace'.

Parameters

trace	Returns the new inference trace object.
level	The tracing level.
parent_id	The parent trace id for this trace. A value of 0 indicates that there is not parent trace.
activity_fn	The callback function where timeline activity for the trace is reported.
tensor_activity_fn	The callback function where tensor activity for the trace is reported.
release_fn	The callback function called when all activity is complete for the trace.
trace_userp	User-provided pointer that is delivered to the activity and release callback functions.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InstanceGroupKindString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InstanceGroupKindString ( TRITONSERVER_InstanceGroupKind kind )

Get the string representation of an instance-group kind.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

kind	The instance-group kind.

Returns: The string representation of the kind.

◆ TRITONSERVER_LogIsEnabled()

TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled ( TRITONSERVER_LogLevel level )

Is a log level enabled?

Parameters

level The log level.

Returns: True if the log level is enabled, false if not enabled.

◆ TRITONSERVER_LogMessage()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_LogMessage	(	TRITONSERVER_LogLevel	level,
		const char *	filename,
		const int	line,
		const char *	msg
	)

Log a message at a given log level if that level is enabled.

Parameters

level	The log level.
filename	The file name of the location of the log message.
line	The line number of the log message.
msg	The log message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MemoryTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_MemoryTypeString ( TRITONSERVER_MemoryType memtype )

Get the string representation of a memory type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

memtype The memory type.

Returns: The string representation of the memory type.

◆ TRITONSERVER_MessageDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MessageDelete ( struct TRITONSERVER_Message * message )

Delete a message object.

Parameters

message The message object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MessageNewFromSerializedJson()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MessageNewFromSerializedJson	(	struct TRITONSERVER_Message **	message,
		const char *	base,
		size_t	byte_size
	)

TRITONSERVER_Message.

Object representing a Triton Server message. Create a new message object from serialized JSON string.

Parameters

message	The message object.
base	The base of the serialized JSON.
byte_size	The size, in bytes, of the serialized message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MessageSerializeToJson()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MessageSerializeToJson	(	struct TRITONSERVER_Message *	message,
		const char **	base,
		size_t *	byte_size
	)

Get the base and size of the buffer containing the serialized message in JSON format.

The buffer is owned by the TRITONSERVER_Message object and should not be modified or freed by the caller. The lifetime of the buffer extends only as long as 'message' and must not be accessed once 'message' is deleted.

Parameters

message	The message object.
base	Returns the base of the serialized message.
byte_size	Returns the size, in bytes, of the serialized message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricArgsDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsDelete ( struct TRITONSERVER_MetricArgs * args )

Delete a metric args object.

Parameters

args	The metric args object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricArgsNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsNew ( struct TRITONSERVER_MetricArgs ** args )

Create a new metric args object.

The caller takes ownership of the TRITONSERVER_MetricArgs object and must call TRITONSERVER_MetricArgsDelete to release the object.

Parameters

args	Returns the new metric args object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricArgsSetHistogram()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsSetHistogram	(	struct TRITONSERVER_MetricArgs *	args,
		const double *	buckets,
		const uint64_t	buckets_count
	)

Set metric args with histogram metric parameter.

Parameters

args	The metric args object to set.
buckets	The array of bucket boundaries for the expected range of observed values.
buckets_count	The number of bucket boundaries.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricDelete ( struct TRITONSERVER_Metric * metric )

Delete a metric object.

All TRITONSERVER_Metric* objects should be deleted BEFORE their corresponding TRITONSERVER_MetricFamily* objects have been deleted. If a family is deleted before its metrics, an error will be returned.

Parameters

metric The metric object to delete.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricFamilyDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyDelete ( struct TRITONSERVER_MetricFamily * family )

Delete a metric family object.

A TRITONSERVER_MetricFamily* object should be deleted AFTER its corresponding TRITONSERVER_Metric* objects have been deleted. Attempting to delete a family before its metrics will return an error.

Parameters

family The metric family object to delete.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricFamilyNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew	(	struct TRITONSERVER_MetricFamily **	family,
		const TRITONSERVER_MetricKind	kind,
		const char *	name,
		const char *	description
	)

Create a new metric family object.

The caller takes ownership of the TRITONSERVER_MetricFamily object and must call TRITONSERVER_MetricFamilyDelete to release the object.

Parameters

family	Returns the new metric family object.
kind	The type of metric family to create.
name	The name of the metric family seen when calling the metrics endpoint.
description	The description of the metric family seen when calling the metrics endpoint.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricIncrement()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement	(	struct TRITONSERVER_Metric *	metric,
		double	value
	)

Increment the current value of metric by value.

Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE for any value, and TRITONSERVER_METRIC_KIND_COUNTER for non-negative values. Returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind and TRITONSERVER_ERROR_INVALID_ARG for negative values on a TRITONSERVER_METRIC_KIND_COUNTER metric.

Parameters

metric	The metric object to update.
value	The amount to increment the metric's value by.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew	(	struct TRITONSERVER_Metric **	metric,
		struct TRITONSERVER_MetricFamily *	family,
		const struct TRITONSERVER_Parameter **	labels,
		const uint64_t	label_count
	)

Create a new metric object.

The caller takes ownership of the TRITONSERVER_Metric object and must call TRITONSERVER_MetricDelete to release the object. The caller is also responsible for ownership of the labels passed in. Each label can be deleted immediately after creating the metric with TRITONSERVER_ParameterDelete if not re-using the labels.

Parameters

metric	Returns the new metric object.
family	The metric family to add this new metric to.
labels	The array of labels to associate with this new metric.
label_count	The number of labels.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricNewWithArgs()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNewWithArgs	(	struct TRITONSERVER_Metric **	metric,
		struct TRITONSERVER_MetricFamily *	family,
		const struct TRITONSERVER_Parameter **	labels,
		const uint64_t	label_count,
		const struct TRITONSERVER_MetricArgs *	args
	)

Create a new metric object.

The caller takes ownership of the TRITONSERVER_Metric object and must call TRITONSERVER_MetricDelete to release the object. The caller is also responsible for ownership of the labels passed in. Each label can be deleted immediately after creating the metric with TRITONSERVER_ParameterDelete if not re-using the labels. Metric args can be deleted immediately after creating the metric with TRITONSERVER_MetricArgsDelete if not re-using the metric args.

Parameters

metric	Returns the new metric object.
family	The metric family to add this new metric to.
labels	The array of labels to associate with this new metric.
label_count	The number of labels.
args	Metric args that store additional arguments to construct particular metric types, e.g. histogram.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricObserve()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricObserve	(	struct TRITONSERVER_Metric *	metric,
		double	value
	)

Sample an observation and count it to the appropriate bucket of a metric.

Supports metrics of kind TRITONSERVER_METRIC_KIND_HISTOGRAM and returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.

Parameters

metric	The metric object to update.
value	The amount for metric to sample observation.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricsDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricsDelete ( struct TRITONSERVER_Metrics * metrics )

Delete a metrics object.

Parameters

metrics The metrics object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricSet()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet	(	struct TRITONSERVER_Metric *	metric,
		double	value
	)

Set the current value of metric to value.

Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.

Parameters

metric	The metric object to update.
value	The amount to set metric's value to.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricsFormatted()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricsFormatted	(	struct TRITONSERVER_Metrics *	metrics,
		TRITONSERVER_MetricFormat	format,
		const char **	base,
		size_t *	byte_size
	)

Get a buffer containing the metrics in the specified format.

For each format the buffer contains the following:

TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline string (char*) that gives a text representation of the metrics in prometheus format. 'byte_size' returns the length of the string in bytes.

The buffer is owned by the 'metrics' object and should not be modified or freed by the caller. The lifetime of the buffer extends only as long as 'metrics' and must not be accessed once 'metrics' is deleted.

Parameters

metrics	The metrics object.
format	The format to use for the returned metrics.
base	Returns a pointer to the base of the formatted metrics, as described above.
byte_size	Returns the size, in bytes, of the formatted metrics.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricValue()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricValue	(	struct TRITONSERVER_Metric *	metric,
		double *	value
	)

Get the current value of a metric object.

Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER and TRITONSERVER_METRIC_KIND_GAUGE, and returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.

Parameters

metric	The metric object to query.
value	Returns the current value of the metric object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ParameterBytesNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter* TRITONSERVER_ParameterBytesNew	(	const char *	name,
		const void *	byte_ptr,
		const uint64_t	size
	)

Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.

The caller takes ownership of the TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to release the object. The object only maintains a shallow copy of the 'byte_ptr' so the data content must be valid until the parameter object is deleted.

Parameters

name	The parameter name.
byte_ptr	The pointer to the data content.
size	The size of the data content.

Returns: A new TRITONSERVER_Error object.

◆ TRITONSERVER_ParameterDelete()

TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete ( struct TRITONSERVER_Parameter * parameter )

Delete an parameter object.

Parameters

parameter The parameter object.

◆ TRITONSERVER_ParameterNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter* TRITONSERVER_ParameterNew	(	const char *	name,
		const TRITONSERVER_ParameterType	type,
		const void *	value
	)

Create a new parameter object.

The caller takes ownership of the TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to release the object. The object will maintain its own copy of the 'value'

Parameters

name	The parameter name.
type	The parameter type.
value	The pointer to the value.

Returns: A new TRITONSERVER_Parameter object. 'nullptr' will be returned if 'type' is 'TRITONSERVER_PARAMETER_BYTES'. The caller should use TRITONSERVER_ParameterBytesNew to create parameter with bytes type.

◆ TRITONSERVER_ParameterTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ParameterTypeString ( TRITONSERVER_ParameterType paramtype )

Get the string representation of a parameter type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters

paramtype The parameter type.

Returns: The string representation of the parameter type.

◆ TRITONSERVER_ResponseAllocatorDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorDelete ( struct TRITONSERVER_ResponseAllocator * allocator )

Delete a response allocator.

Parameters

allocator The response allocator object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorNew	(	struct TRITONSERVER_ResponseAllocator **	allocator,
		TRITONSERVER_ResponseAllocatorAllocFn_t	alloc_fn,
		TRITONSERVER_ResponseAllocatorReleaseFn_t	release_fn,
		TRITONSERVER_ResponseAllocatorStartFn_t	start_fn
	)

Create a new response allocator object.

The response allocator object is used by Triton to allocate buffers to hold the output tensors in inference responses. Most models generate a single response for each inference request (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of callbacks will be:

TRITONSERVER_ServerInferAsync called

start_fn : optional (and typically not required)
alloc_fn : called once for each output tensor in response TRITONSERVER_InferenceResponseDelete called
release_fn: called once for each output tensor in response

For models that generate multiple responses for each inference request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be used to determine sets of alloc_fn callbacks that belong to the same response:

TRITONSERVER_ServerInferAsync called

start_fn
alloc_fn : called once for each output tensor in response
start_fn
alloc_fn : called once for each output tensor in response ... For each response, TRITONSERVER_InferenceResponseDelete called
release_fn: called once for each output tensor in the response

In all cases the start_fn, alloc_fn and release_fn callback functions must be thread-safe. Typically making these functions thread-safe does not require explicit locking. The recommended way to implement these functions is to have each inference request provide a 'response_allocator_userp' object that is unique to that request with TRITONSERVER_InferenceRequestSetResponseCallback. The callback functions then operate only on this unique state. Locking is required only when the callback function needs to access state that is shared across inference requests (for example, a common allocation pool).

Parameters

allocator	Returns the new response allocator object.
alloc_fn	The function to call to allocate buffers for result tensors.
release_fn	The function to call when the server no longer holds a reference to an allocated buffer.
start_fn	The function to call to indicate that the subsequent 'alloc_fn' calls are for a new response. This callback is optional (use nullptr to indicate that it should not be invoked).

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction	(	struct TRITONSERVER_ResponseAllocator *	allocator,
		TRITONSERVER_ResponseAllocatorBufferAttributesFn_t	buffer_attributes_fn
	)

Set the buffer attributes function for a response allocator object.

The function will be called after alloc_fn to set the buffer attributes associated with the output buffer.

The thread-safy requirement for buffer_attributes_fn is the same as other allocator callbacks.

Parameters

allocator	The response allocator object.
buffer_attributes_fn	The function to call to get the buffer attributes information for an allocated buffer.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorSetQueryFunction()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetQueryFunction	(	struct TRITONSERVER_ResponseAllocator *	allocator,
		TRITONSERVER_ResponseAllocatorQueryFn_t	query_fn
	)

Set the query function to a response allocator object.

Usually the function will be called before alloc_fn to understand what is the allocator's preferred memory type and memory type ID at the current situation to make different execution decision.

The thread-safy requirement for query_fn is the same as other allocator callbacks.

Parameters

allocator	The response allocator object.
query_fn	The function to call to query allocator's preferred memory type and memory type ID.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerDelete ( struct TRITONSERVER_Server * server )

Delete a server object.

If server is not already stopped it is stopped before being deleted.

Parameters

server The inference server object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerInferAsync()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync	(	struct TRITONSERVER_Server *	server,
		struct TRITONSERVER_InferenceRequest *	inference_request,
		struct TRITONSERVER_InferenceTrace *	trace
	)

Perform inference using the meta-data and inputs supplied by the 'inference_request'.

If the function returns success, then the caller releases ownership of 'inference_request' and must not access it in any way after this call, until ownership is returned via the 'request_release_fn' callback registered in the request object with TRITONSERVER_InferenceRequestSetReleaseCallback.

The function unconditionally takes ownership of 'trace' and so the caller must not access it in any way after this call (except in the trace activity callbacks) until ownership is returned via the trace's release_fn callback.

Responses produced for this request are returned using the allocator and callback registered with the request by TRITONSERVER_InferenceRequestSetResponseCallback.

Parameters

server	The inference server object.
inference_request	The request object.
trace	The trace object for this request, or nullptr if no tracing.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerIsLive()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerIsLive	(	struct TRITONSERVER_Server *	server,
		bool *	live
	)

Is the server live?

Parameters

server	The inference server object.
live	Returns true if server is live, false otherwise.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerIsReady()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerIsReady	(	struct TRITONSERVER_Server *	server,
		bool *	ready
	)

Is the server ready?

Parameters

server	The inference server object.
ready	Returns true if server is ready, false otherwise.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerLoadModel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerLoadModel	(	struct TRITONSERVER_Server *	server,
		const char *	model_name
	)

Load the requested model or reload the model if it is already loaded.

The function does not return until the model is loaded or fails to load. Returned error indicates if model loaded successfully or not.

Parameters

server	The inference server object.
model_name	The name of the model.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerLoadModelWithParameters()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerLoadModelWithParameters	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const struct TRITONSERVER_Parameter **	parameters,
		const uint64_t	parameter_count
	)

Load the requested model or reload the model if it is already loaded, with load parameters provided.

The function does not return until the model is loaded or fails to load. Returned error indicates if model loaded successfully or not. Currently the below parameter names are recognized:

"config" : string parameter that contains a JSON representation of the model configuration. This config will be used for loading the model instead of the one in the model directory.

Parameters

server	The inference server object.
model_name	The name of the model.
parameters	The array of load parameters.
parameter_count	The number of parameters.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerMetadata()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerMetadata	(	struct TRITONSERVER_Server *	server,
		struct TRITONSERVER_Message **	server_metadata
	)

Get the metadata of the server as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters

server	The inference server object.
server_metadata	Returns the server metadata message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerMetrics()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerMetrics	(	struct TRITONSERVER_Server *	server,
		struct TRITONSERVER_Metrics **	metrics
	)

Get the current metrics for the server.

The caller takes ownership of the metrics object and must call TRITONSERVER_MetricsDelete to release the object.

Parameters

server	The inference server object.
metrics	Returns the metrics.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelBatchProperties()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelBatchProperties	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		uint32_t *	flags,
		void **	voidp
	)

Get the batch properties of the model.

The properties are communicated by a flags value and an (optional) object returned by 'voidp'.

TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the batching properties of the model. This means that the model does not support batching in any way that is usable by Triton. The returned 'voidp' value is nullptr.
TRITONSERVER_BATCH_FIRST_DIM: The model supports batching along the first dimension of every input and output tensor. Triton schedulers that perform batching can automatically batch inference requests along this dimension. The returned 'voidp' value is nullptr.

Parameters

server	The inference server object.
model_name	The name of the model.
model_version	The version of the model. If -1 then the server will choose a version based on the model's policy.
flags	Returns flags indicating the batch properties of the model.
voidp	If non-nullptr, returns a point specific to the 'flags' value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelConfig()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelConfig	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		const uint32_t	config_version,
		struct TRITONSERVER_Message **	model_config
	)

Get the configuration of a model as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters

server	The inference server object.
model_name	The name of the model.
model_version	The version of the model. If -1 then the server will choose a version based on the model's policy.
config_version	The model configuration will be returned in a format matching this version. If the configuration cannot be represented in the requested version's format then an error will be returned. Currently only version 1 is supported.
model_config	Returns the model config message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelIndex()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelIndex	(	struct TRITONSERVER_Server *	server,
		uint32_t	flags,
		struct TRITONSERVER_Message **	model_index
	)

Get the index of all unique models in the model repositories as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models that are loaded into the server and ready for inferencing are returned.

Parameters

server	The inference server object.
flags	TRITONSERVER_ModelIndexFlag flags that control how to collect the index.
model_index	Return the model index message that holds the index of all models contained in the server's model repository(s).

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelIsReady()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelIsReady	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		bool *	ready
	)

Is the model ready?

Parameters

server	The inference server object.
model_name	The name of the model to get readiness for.
model_version	The version of the model to get readiness for. If -1 then the server will choose a version based on the model's policy.
ready	Returns true if server is ready, false otherwise.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelMetadata()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelMetadata	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		struct TRITONSERVER_Message **	model_metadata
	)

Get the metadata of a model as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters

server	The inference server object.
model_name	The name of the model.
model_version	The version of the model. If -1 then the server will choose a version based on the model's policy.
model_metadata	Returns the model metadata message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelStatistics()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelStatistics	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		struct TRITONSERVER_Message **	model_stats
	)

Get the statistics of a model as a TRITONSERVER_Message object.

The caller takes ownership of the object and must call TRITONSERVER_MessageDelete to release the object.

Parameters

server	The inference server object.
model_name	The name of the model. If empty, then statistics for all available models will be returned, and the server will choose a version based on those models' policies.
model_version	The version of the model. If -1 then the server will choose a version based on the model's policy.
model_stats	Returns the model statistics message.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelTransactionProperties()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelTransactionProperties	(	struct TRITONSERVER_Server *	server,
		const char *	model_name,
		const int64_t	model_version,
		uint32_t *	txn_flags,
		void **	voidp
	)

Get the transaction policy of the model.

The policy is communicated by a flags value.

TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly one response per request.
TRITONSERVER_TXN_DECOUPLED: The model may generate zero to many responses per request.

Parameters

server	The inference server object.
model_name	The name of the model.
model_version	The version of the model. If -1 then the server will choose a version based on the model's policy.
txn_flags	Returns flags indicating the transaction policy of the model.
voidp	If non-nullptr, returns a point specific to the 'flags' value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerNew	(	struct TRITONSERVER_Server **	server,
		struct TRITONSERVER_ServerOptions *	options
	)

Create a new server object.

The caller takes ownership of the TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete to release the object.

Parameters

server	Returns the new inference server object.
options	The inference server options object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsAddRateLimiterResource()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsAddRateLimiterResource	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	resource_name,
		const size_t	resource_count,
		const int	device
	)

Add resource count for rate limiting.

Parameters

options	The server options object.
name	The name of the resource.
count	The count of the resource.
device	The device identifier for the resource. A value of -1 indicates that the specified number of resources are available on every device. The device value is ignored for a global resource. The server will use the rate limiter configuration specified for instance groups in model config to determine whether resource is global. In case of conflicting resource type in different model configurations, server will raise an appropriate error while loading model.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsDelete()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsDelete ( struct TRITONSERVER_ServerOptions * options )

Delete a server options object.

Parameters

options The server options object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsNew()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew ( struct TRITONSERVER_ServerOptions ** options )

Create a new server options object.

The caller takes ownership of the TRITONSERVER_ServerOptions object and must call TRITONSERVER_ServerOptionsDelete to release the object.

Parameters

options Returns the new server options object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBackendConfig()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendConfig	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	backend_name,
		const char *	setting,
		const char *	value
	)

Set a configuration setting for a named backend in a server options.

Parameters

options	The server options object.
backend_name	The name of the backend.
setting	The name of the setting.
value	The setting value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBackendDirectory()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendDirectory	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	backend_dir
	)

Set the directory containing backend shared libraries.

This directory is searched last after the version and model directory in the model repository when looking for the backend shared library for a model. If the backend is named 'be' the directory searched is 'backend_dir'/be/libtriton_be.so.

Parameters

options	The server options object.
backend_dir	The full path of the backend directory.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBufferManagerThreadCount()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBufferManagerThreadCount	(	struct TRITONSERVER_ServerOptions *	options,
		unsigned int	thread_count
	)

Set the number of threads used in buffer manager in a server options.

Parameters

options	The server options object.
thread_count	The number of threads.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCacheConfig()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCacheConfig	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	cache_name,
		const char *	config_json
	)

Set the cache config that will be used to initialize the cache implementation for "cache_name".

It is expected that the "cache_name" provided matches a directory inside the "cache_dir" used for TRITONSERVER_ServerOptionsSetCacheDirectory. The default "cache_dir" is "/opt/tritonserver/caches", so for a "cache_name" of "local", Triton would expect to find the "local" cache implementation at "/opt/tritonserver/caches/local/libtritoncache_local.so"

Altogether an example for the "local" cache implementation would look like: std::string cache_name = "local"; std::string config_json = R"({"size": 1048576})" auto err = TRITONSERVER_ServerOptionsSetCacheConfig( options, cache_name, config_json);

Parameters

options	The server options object.
cache_name	The name of the cache. Example names would be "local", "redis", or the name of a custom cache implementation.
config_json	The string representation of config JSON that is used to initialize the cache implementation.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCacheDirectory()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCacheDirectory	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	cache_dir
	)

Set the directory containing cache shared libraries.

This directory is searched when looking for cache implementations.

Parameters

options	The server options object.
cache_dir	The full path of the cache directory.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCpuMetrics()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCpuMetrics	(	struct TRITONSERVER_ServerOptions *	options,
		bool	cpu_metrics
	)

Enable or disable CPU metrics collection in a server options.

CPU metrics are collected if both this option and TRITONSERVER_ServerOptionsSetMetrics are true.

Parameters

options	The server options object.
cpu_metrics	True to enable CPU metrics, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize	(	struct TRITONSERVER_ServerOptions *	options,
		int	gpu_device,
		uint64_t	size
	)

Set the total CUDA memory byte size that the server can allocate on given GPU device in a server options.

The pinned memory pool will be shared across Triton itself and the backends that use TRITONBACKEND_MemoryManager to allocate memory.

Parameters

options	The server options object.
gpu_device	The GPU device to allocate the memory pool.
size	The CUDA memory pool byte size.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize	(	TRITONSERVER_ServerOptions *	options,
		int	gpu_device,
		size_t	cuda_virtual_address_size
	)

Set the size of the virtual address space that will be used for growable memory in implicit state.

Parameters

options	The server options object.
gpu_device	The GPU device to set the CUDA virtual address space size
size	The size of the CUDA virtual address space.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetEnablePeerAccess()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetEnablePeerAccess	(	struct TRITONSERVER_ServerOptions *	options,
		bool	enable_peer_access
	)

Enable peer access to allow GPU device to directly access the memory of another GPU device.

Note that even when this option is set to True, Triton will only try to enable peer access and might fail to enable it if the underlying system doesn't support peer access.

Parameters

options	The server options object.
enable_peer_access	Whether to enable peer access or not.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetExitOnError()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitOnError	(	struct TRITONSERVER_ServerOptions *	options,
		bool	exit
	)

Enable or disable exit-on-error in a server options.

Parameters

options	The server options object.
exit	True to enable exiting on initialization error, false to continue.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetExitTimeout()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitTimeout	(	struct TRITONSERVER_ServerOptions *	options,
		unsigned int	timeout
	)

Set the exit timeout, in seconds, for the server in a server options.

Parameters

options	The server options object.
timeout	The exit timeout, in seconds.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetGpuMetrics()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetGpuMetrics	(	struct TRITONSERVER_ServerOptions *	options,
		bool	gpu_metrics
	)

Enable or disable GPU metrics collection in a server options.

GPU metrics are collected if both this option and TRITONSERVER_ServerOptionsSetMetrics are true.

Parameters

options	The server options object.
gpu_metrics	True to enable GPU metrics, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetHostPolicy()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetHostPolicy	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	policy_name,
		const char *	setting,
		const char *	value
	)

Set a host policy setting for a given policy name in a server options.

Parameters

options	The server options object.
policy_name	The name of the policy.
setting	The name of the setting.
value	The setting value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogError()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogError	(	struct TRITONSERVER_ServerOptions *	options,
		bool	log
	)

Enable or disable error level logging.

Parameters

options	The server options object.
log	True to enable error logging, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogFile()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFile	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	file
	)

Provide a log output file.

Parameters

options	The server options object.
file	a string defining the file where the log outputs will be saved. An empty string for the file name will cause triton to direct logging facilities to the console

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogFormat()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFormat	(	struct TRITONSERVER_ServerOptions *	options,
		const TRITONSERVER_LogFormat	format
	)

Set the logging format.

Parameters

options	The server options object.
format	The logging format.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogInfo()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogInfo	(	struct TRITONSERVER_ServerOptions *	options,
		bool	log
	)

Enable or disable info level logging.

Parameters

options	The server options object.
log	True to enable info logging, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogVerbose()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogVerbose	(	struct TRITONSERVER_ServerOptions *	options,
		int	level
	)

Set verbose logging level.

Level zero disables verbose logging.

Parameters

options	The server options object.
level	The verbose logging level.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogWarn()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogWarn	(	struct TRITONSERVER_ServerOptions *	options,
		bool	log
	)

Enable or disable warning level logging.

Parameters

options	The server options object.
log	True to enable warning logging, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMetrics()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetrics	(	struct TRITONSERVER_ServerOptions *	options,
		bool	metrics
	)

Enable or disable metrics collection in a server options.

Parameters

options	The server options object.
metrics	True to enable metrics, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMetricsConfig()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetricsConfig	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	name,
		const char *	setting,
		const char *	value
	)

Set a configuration setting for metrics in server options.

Parameters

options	The server options object.
name	The name of the configuration group. An empty string indicates a global configuration option.
setting	The name of the setting.
value	The setting value.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMetricsInterval()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetricsInterval	(	struct TRITONSERVER_ServerOptions *	options,
		uint64_t	metrics_interval_ms
	)

Set the interval for metrics collection in a server options.

This is 2000 milliseconds by default.

Parameters

options	The server options object.
metrics_interval_ms	The time interval in ms between successive metrics updates.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability	(	struct TRITONSERVER_ServerOptions *	options,
		double	cc
	)

Set the minimum support CUDA compute capability in a server options.

Parameters

options	The server options object.
cc	The minimum CUDA compute capability.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelConfigName()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelConfigName	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	model_config_name
	)

Set the custom model configuration name to load for all models.

Fall back to default config file if empty.

Parameters

options	The server options object.
config_name	The name of the config file to load for all models.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelControlMode()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelControlMode	(	struct TRITONSERVER_ServerOptions *	options,
		TRITONSERVER_ModelControlMode	mode
	)

Set the model control mode in a server options.

For each mode the models will be managed as the following:

TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be loaded on startup. After startup any changes to the model repository will be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in an error.

TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be loaded on startup. The model repository can be polled periodically using TRITONSERVER_ServerPollModelRepository and the server will load, unload, and updated models according to changes in the model repository.

TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will not be loaded on startup. The corresponding model control APIs must be called to load / unload a model in the model repository.

Parameters

options	The server options object.
mode	The mode to use for the model control.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit	(	struct TRITONSERVER_ServerOptions *	options,
		const TRITONSERVER_InstanceGroupKind	kind,
		const int	device_id,
		const double	fraction
	)

Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'.

If model loading on the device is requested and the current memory usage exceeds the limit, the load will be rejected. If not specified, the limit will not be set.

Currently support TRITONSERVER_INSTANCEGROUPKIND_GPU

Parameters

options	The server options object.
kind	The kind of the device.
device_id	The id of the device.
fraction	The limit on memory usage as a fraction

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelLoadRetryCount()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadRetryCount	(	struct TRITONSERVER_ServerOptions *	options,
		unsigned int	retry_count
	)

Set the number of retry to load a model in a server options.

Parameters

options	The server options object.
retry_count	The number of retry.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelLoadThreadCount()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadThreadCount	(	struct TRITONSERVER_ServerOptions *	options,
		unsigned int	thread_count
	)

Set the number of threads to concurrently load models in a server options.

Parameters

options	The server options object.
thread_count	The number of threads.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelNamespacing()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelNamespacing	(	struct TRITONSERVER_ServerOptions *	options,
		bool	enable_namespace
	)

Enable model namespacing to allow serving models with the same name if they are in different namespaces.

Parameters

options	The server options object.
enable_namespace	Whether to enable model namespacing or not.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelRepositoryPath()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelRepositoryPath	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	model_repository_path
	)

Set the model repository path in a server options.

The path must be the full absolute path to the model repository. This function can be called multiple times with different paths to set multiple model repositories. Note that if a model is not unique across all model repositories at any time, the model will not be available.

Parameters

options	The server options object.
model_repository_path	The full path to the model repository.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize	(	struct TRITONSERVER_ServerOptions *	options,
		uint64_t	size
	)

Set the total pinned memory byte size that the server can allocate in a server options.

The pinned memory pool will be shared across Triton itself and the backends that use TRITONBACKEND_MemoryManager to allocate memory.

Parameters

options	The server options object.
size	The pinned memory pool byte size.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetRateLimiterMode()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRateLimiterMode	(	struct TRITONSERVER_ServerOptions *	options,
		TRITONSERVER_RateLimitMode	mode
	)

Set the rate limit mode in a server options.

TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the inference execution using the number of times each instance has got a chance to run. The execution gets to run only when its resource constraints are satisfied.

TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the inference gets executed whenever an instance is available.

Parameters

options	The server options object.
mode	The mode to use for the rate limiting. By default, execution count is used to determine the priorities.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetRepoAgentDirectory()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRepoAgentDirectory	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	repoagent_dir
	)

Set the directory containing repository agent shared libraries.

This directory is searched when looking for the repository agent shared library for a model. If the repo agent is named 'ra' the directory searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.

Parameters

options	The server options object.
repoagent_dir	The full path of the repository agent directory.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetResponseCacheByteSize()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetResponseCacheByteSize	(	struct TRITONSERVER_ServerOptions *	options,
		uint64_t	size
	)

Deprecated.

See TRITONSERVER_ServerOptionsSetCacheConfig instead.

Set the total response cache byte size that the server can allocate in CPU memory. The response cache will be shared across all inference requests and across all models.

Parameters

options	The server options object.
size	The total response cache byte size.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetServerId()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetServerId	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	server_id
	)

Set the textual ID for the server in a server options.

The ID is a name that identifies the server.

Parameters

options	The server options object.
server_id	The server identifier.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStartupModel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStartupModel	(	struct TRITONSERVER_ServerOptions *	options,
		const char *	model_name
	)

Set the model to be loaded at startup in a server options.

The model must be present in one, and only one, of the specified model repositories. This function can be called multiple times with different model name to set multiple startup models. Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.

Parameters

options	The server options object.
mode_name	The name of the model to load on startup.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStrictModelConfig()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictModelConfig	(	struct TRITONSERVER_ServerOptions *	options,
		bool	strict
	)

Enable or disable strict model configuration handling in a server options.

Parameters

options	The server options object.
strict	True to enable strict model configuration handling, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStrictReadiness()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictReadiness	(	struct TRITONSERVER_ServerOptions *	options,
		bool	strict
	)

Enable or disable strict readiness handling in a server options.

Parameters

options	The server options object.
strict	True to enable strict readiness handling, false to disable.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerPollModelRepository()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerPollModelRepository ( struct TRITONSERVER_Server * server )

Check the model repository for changes and update server state based on those changes.

Parameters

server The inference server object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerRegisterModelRepository()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerRegisterModelRepository	(	struct TRITONSERVER_Server *	server,
		const char *	repository_path,
		const struct TRITONSERVER_Parameter **	name_mapping,
		const uint32_t	mapping_count
	)

Register a new model repository.

Not available in polling mode.

Parameters

server	The inference server object.
repository_path	The full path to the model repository.
name_mapping	List of name_mapping parameters. Each mapping has the model directory name as its key, overridden model name as its value.
model_count	Number of mappings provided.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerSetExitTimeout()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerSetExitTimeout	(	struct TRITONSERVER_Server *	server,
		unsigned int	timeout
	)

Set the exit timeout on the server object.

This value overrides the value initially set through server options and provides a mechanism to update the exit timeout while the serving is running.

Parameters

server	The inference server object.
timeout	The exit timeout, in seconds.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerStop()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerStop ( struct TRITONSERVER_Server * server )

Stop a server object.

A server can't be restarted once it is stopped.

Parameters

server The inference server object.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnloadModel()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel	(	struct TRITONSERVER_Server *	server,
		const char *	model_name
	)

Unload the requested model.

Unloading a model that is not loaded on server has no affect and success code will be returned. The function does not wait for the requested model to be fully unload and success code will be returned. Returned error indicates if model unloaded successfully or not.

Parameters

server	The inference server object.
model_name	The name of the model.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnloadModelAndDependents()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerUnloadModelAndDependents	(	struct TRITONSERVER_Server *	server,
		const char *	model_name
	)

Unload the requested model, and also unload any dependent model that was loaded along with the requested model (for example, the models composing an ensemble).

Unloading a model that is not loaded on server has no affect and success code will be returned. The function does not wait for the requested model and all dependent models to be fully unload and success code will be returned. Returned error indicates if model unloaded successfully or not.

Parameters

server	The inference server object.
model_name	The name of the model.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnregisterModelRepository()

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerUnregisterModelRepository	(	struct TRITONSERVER_Server *	server,
		const char *	repository_path
	)

Unregister a model repository.

Not available in polling mode.

Parameters

server	The inference server object.
repository_path	The full path to the model repository.

Returns: a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_StringToDataType()

TRITONSERVER_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType ( const char * dtype )

Get the Triton datatype corresponding to a string representation of a datatype.

Parameters

dtype The datatype string representation.

Returns: The Triton data type or TRITONSERVER_TYPE_INVALID if the string does not represent a data type.


NVIDIA DeepStream SDK API Reference	8.0 Release

NVIDIA DeepStream SDK API Reference

8.0 Release

Macros

Typedefs

Enumerations

Functions

Macro Definition Documentation

◆ TRITONSERVER_API_VERSION_MAJOR

◆ TRITONSERVER_API_VERSION_MINOR

◆ TRITONSERVER_DECLSPEC

Typedef Documentation

◆ TRITONSERVER_DataType

◆ TRITONSERVER_Error_Code

◆ TRITONSERVER_InferenceRequestReleaseFn_t

◆ TRITONSERVER_InferenceResponseCompleteFn_t

◆ TRITONSERVER_InferenceTraceActivity

◆ TRITONSERVER_InferenceTraceActivityFn_t

◆ TRITONSERVER_InferenceTraceLevel

◆ TRITONSERVER_InferenceTraceReleaseFn_t

◆ TRITONSERVER_InferenceTraceTensorActivityFn_t

◆ TRITONSERVER_InstanceGroupKind

◆ TRITONSERVER_LogFormat

◆ TRITONSERVER_LogLevel

◆ TRITONSERVER_MemoryType

◆ TRITONSERVER_MetricFormat

◆ TRITONSERVER_MetricKind

◆ TRITONSERVER_ModelBatchFlag

◆ TRITONSERVER_ModelControlMode

◆ TRITONSERVER_ModelIndexFlag

◆ TRITONSERVER_ModelTxnPropertyFlag

◆ TRITONSERVER_ParameterType

◆ TRITONSERVER_RateLimitMode

◆ TRITONSERVER_RequestFlag

◆ TRITONSERVER_RequestReleaseFlag

◆ TRITONSERVER_ResponseAllocatorAllocFn_t

◆ TRITONSERVER_ResponseAllocatorBufferAttributesFn_t

◆ TRITONSERVER_ResponseAllocatorQueryFn_t

◆ TRITONSERVER_ResponseAllocatorReleaseFn_t

◆ TRITONSERVER_ResponseAllocatorStartFn_t

◆ TRITONSERVER_ResponseCompleteFlag

Enumeration Type Documentation

◆ tritonserver_batchflag_enum

◆ TRITONSERVER_datatype_enum

◆ TRITONSERVER_errorcode_enum

◆ TRITONSERVER_instancegroupkind_enum

◆ TRITONSERVER_logformat_enum

◆ TRITONSERVER_loglevel_enum

◆ TRITONSERVER_memorytype_enum

◆ tritonserver_metricformat_enum

◆ TRITONSERVER_metrickind_enum

◆ tritonserver_modelcontrolmode_enum

◆ tritonserver_modelindexflag_enum

◆ TRITONSERVER_parametertype_enum

◆ tritonserver_ratelimitmode_enum

◆ tritonserver_requestflag_enum

◆ tritonserver_requestreleaseflag_enum

◆ tritonserver_responsecompleteflag_enum

◆ tritonserver_traceactivity_enum

◆ tritonserver_tracelevel_enum

◆ tritonserver_txn_property_flag_enum

Function Documentation

◆ TRITONSERVER_ApiVersion()

◆ TRITONSERVER_BufferAttributesByteSize()

◆ TRITONSERVER_BufferAttributesCudaIpcHandle()

◆ TRITONSERVER_BufferAttributesDelete()

◆ TRITONSERVER_BufferAttributesMemoryType()

◆ TRITONSERVER_BufferAttributesMemoryTypeId()

◆ TRITONSERVER_BufferAttributesNew()

◆ TRITONSERVER_BufferAttributesSetByteSize()

◆ TRITONSERVER_BufferAttributesSetCudaIpcHandle()

◆ TRITONSERVER_BufferAttributesSetMemoryType()

◆ TRITONSERVER_BufferAttributesSetMemoryTypeId()

◆ TRITONSERVER_DataTypeByteSize()

◆ TRITONSERVER_DataTypeString()

◆ TRITONSERVER_ErrorCode()

◆ TRITONSERVER_ErrorCodeString()

◆ TRITONSERVER_ErrorDelete()

◆ TRITONSERVER_ErrorMessage()

◆ TRITONSERVER_ErrorNew()

◆ TRITONSERVER_GetMetricFamilyKind()