|
NVIDIA DeepStream SDK API Reference
|
7.1 Release
|
Go to the documentation of this file.
38 #ifdef _COMPILING_TRITONSERVER
40 #define TRITONSERVER_DECLSPEC __declspec(dllexport)
41 #elif defined(__GNUC__)
42 #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
44 #define TRITONSERVER_DECLSPEC
48 #define TRITONSERVER_DECLSPEC __declspec(dllimport)
50 #define TRITONSERVER_DECLSPEC
54 struct TRITONSERVER_BufferAttributes;
55 struct TRITONSERVER_Error;
56 struct TRITONSERVER_InferenceRequest;
57 struct TRITONSERVER_InferenceResponse;
58 struct TRITONSERVER_InferenceTrace;
59 struct TRITONSERVER_Message;
60 struct TRITONSERVER_Metrics;
61 struct TRITONSERVER_Parameter;
62 struct TRITONSERVER_ResponseAllocator;
63 struct TRITONSERVER_Server;
64 struct TRITONSERVER_ServerOptions;
65 struct TRITONSERVER_Metric;
66 struct TRITONSERVER_MetricFamily;
67 struct TRITONSERVER_MetricArgs;
94 #define TRITONSERVER_API_VERSION_MAJOR 1
95 #define TRITONSERVER_API_VERSION_MINOR 34
109 uint32_t* major, uint32_t* minor);
224 const char* name,
const void* byte_ptr,
const uint64_t size);
230 struct TRITONSERVER_Parameter* parameter);
349 struct TRITONSERVER_Error* error);
366 struct TRITONSERVER_Error* error);
376 struct TRITONSERVER_Error* error);
414 typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorAllocFn_t)(
415 struct TRITONSERVER_ResponseAllocator* allocator,
const char* tensor_name,
417 int64_t memory_type_id,
void* userp,
void** buffer,
void** buffer_userp,
419 int64_t* actual_memory_type_id);
443 typedef struct TRITONSERVER_Error* (
445 struct TRITONSERVER_ResponseAllocator* allocator,
const char* tensor_name,
446 struct TRITONSERVER_BufferAttributes* buffer_attributes,
void* userp,
473 typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorQueryFn_t)(
474 struct TRITONSERVER_ResponseAllocator* allocator,
void* userp,
475 const char* tensor_name,
size_t* byte_size,
495 typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorReleaseFn_t)(
496 struct TRITONSERVER_ResponseAllocator* allocator,
void* buffer,
498 int64_t memory_type_id);
509 typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorStartFn_t)(
510 struct TRITONSERVER_ResponseAllocator* allocator,
void* userp);
563 struct TRITONSERVER_ResponseAllocator** allocator,
581 struct TRITONSERVER_ResponseAllocator* allocator,
598 struct TRITONSERVER_ResponseAllocator* allocator,
607 struct TRITONSERVER_ResponseAllocator* allocator);
622 struct TRITONSERVER_Message** message,
const char* base,
size_t byte_size);
629 struct TRITONSERVER_Message* message);
644 struct TRITONSERVER_Message* message,
const char** base,
size_t* byte_size);
661 struct TRITONSERVER_Metrics* metrics);
685 const char** base,
size_t* byte_size);
756 struct TRITONSERVER_InferenceTrace* trace,
767 struct TRITONSERVER_InferenceTrace* trace,
770 const int64_t* shape, uint64_t dim_count,
779 struct TRITONSERVER_InferenceTrace* trace,
void* userp);
805 struct TRITONSERVER_InferenceTrace** trace,
837 struct TRITONSERVER_InferenceTrace** trace,
852 TRITONSERVER_InferenceTrace* trace, uint64_t timestamp,
853 const char* activity_name);
869 struct TRITONSERVER_InferenceTrace* trace, uint64_t*
id);
880 struct TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id);
892 struct TRITONSERVER_InferenceTrace* trace,
const char** model_name);
902 struct TRITONSERVER_InferenceTrace* trace, int64_t* model_version);
914 struct TRITONSERVER_InferenceTrace* trace,
const char** request_id);
926 struct TRITONSERVER_InferenceTrace* trace,
927 struct TRITONSERVER_InferenceTrace** child_trace);
936 struct TRITONSERVER_InferenceTrace* trace,
const char* trace_context);
945 struct TRITONSERVER_InferenceTrace* trace,
const char** trace_context);
1005 struct TRITONSERVER_InferenceRequest* request,
const uint32_t flags,
1026 struct TRITONSERVER_InferenceResponse* response,
const uint32_t flags,
1040 struct TRITONSERVER_InferenceRequest** inference_request,
1041 struct TRITONSERVER_Server* server,
const char* model_name,
1042 const int64_t model_version);
1050 struct TRITONSERVER_InferenceRequest* inference_request);
1061 struct TRITONSERVER_InferenceRequest* inference_request,
const char**
id);
1070 struct TRITONSERVER_InferenceRequest* inference_request,
const char*
id);
1081 struct TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags);
1092 struct TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);
1107 struct TRITONSERVER_InferenceRequest* inference_request,
1108 uint64_t* correlation_id);
1123 struct TRITONSERVER_InferenceRequest* inference_request,
1124 const char** correlation_id);
1137 struct TRITONSERVER_InferenceRequest* inference_request,
1138 uint64_t correlation_id);
1151 struct TRITONSERVER_InferenceRequest* inference_request,
1152 const char* correlation_id);
1166 struct TRITONSERVER_InferenceRequest* inference_request);
1179 struct TRITONSERVER_InferenceRequest* inference_request,
1180 bool* is_cancelled);
1193 struct TRITONSERVER_InferenceRequest* inference_request,
1194 uint32_t* priority);
1205 struct TRITONSERVER_InferenceRequest* inference_request,
1206 uint64_t* priority);
1219 struct TRITONSERVER_InferenceRequest* inference_request, uint32_t priority);
1230 struct TRITONSERVER_InferenceRequest* inference_request, uint64_t priority);
1240 struct TRITONSERVER_InferenceRequest* inference_request,
1241 uint64_t* timeout_us);
1251 struct TRITONSERVER_InferenceRequest* inference_request,
1252 uint64_t timeout_us);
1266 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1268 uint64_t dim_count);
1282 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1291 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1299 struct TRITONSERVER_InferenceRequest* inference_request);
1317 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1319 int64_t memory_type_id);
1342 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1344 int64_t memory_type_id,
const char* host_policy_name);
1360 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1361 const void* base,
struct TRITONSERVER_BufferAttributes* buffer_attributes);
1371 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1380 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1389 struct TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1397 struct TRITONSERVER_InferenceRequest* inference_request);
1411 struct TRITONSERVER_InferenceRequest* inference_request,
1413 void* request_release_userp);
1437 struct TRITONSERVER_InferenceRequest* inference_request,
1438 struct TRITONSERVER_ResponseAllocator* response_allocator,
1439 void* response_allocator_userp,
1441 void* response_userp);
1451 struct TRITONSERVER_InferenceRequest* request,
const char* key,
1462 struct TRITONSERVER_InferenceRequest* request,
const char* key,
1463 const int64_t value);
1473 struct TRITONSERVER_InferenceRequest* request,
const char* key,
1484 struct TRITONSERVER_InferenceRequest* request,
const char* key,
1485 const double value);
1500 struct TRITONSERVER_InferenceResponse* inference_response);
1512 struct TRITONSERVER_InferenceResponse* inference_response);
1526 struct TRITONSERVER_InferenceResponse* inference_response,
1527 const char** model_name, int64_t* model_version);
1540 struct TRITONSERVER_InferenceResponse* inference_response,
1541 const char** request_id);
1550 struct TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
1585 struct TRITONSERVER_InferenceResponse* inference_response,
1587 const void** vvalue);
1596 struct TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
1622 struct TRITONSERVER_InferenceResponse* inference_response,
1624 const int64_t** shape, uint64_t* dim_count,
const void** base,
1626 int64_t* memory_type_id,
void** userp);
1643 struct TRITONSERVER_InferenceResponse* inference_response,
1644 const uint32_t index,
const size_t class_index,
const char** label);
1659 struct TRITONSERVER_BufferAttributes** buffer_attributes);
1667 struct TRITONSERVER_BufferAttributes* buffer_attributes);
1677 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1678 int64_t memory_type_id);
1687 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1698 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1699 void* cuda_ipc_handle);
1708 struct TRITONSERVER_BufferAttributes* buffer_attributes,
size_t byte_size);
1718 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1719 int64_t* memory_type_id);
1729 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1741 struct TRITONSERVER_BufferAttributes* buffer_attributes,
1742 void** cuda_ipc_handle);
1752 struct TRITONSERVER_BufferAttributes* buffer_attributes,
size_t* byte_size);
1780 struct TRITONSERVER_ServerOptions** options);
1797 struct TRITONSERVER_ServerOptions* options,
const char* server_id);
1810 struct TRITONSERVER_ServerOptions* options,
1811 const char* model_repository_path);
1835 struct TRITONSERVER_ServerOptions* options,
1849 struct TRITONSERVER_ServerOptions* options,
const char* model_name);
1860 struct TRITONSERVER_ServerOptions* options,
bool strict);
1870 struct TRITONSERVER_ServerOptions* options,
const char* model_config_name);
1888 struct TRITONSERVER_ServerOptions* options,
1906 struct TRITONSERVER_ServerOptions* options,
const char* resource_name,
1907 const size_t resource_count,
const int device);
1919 struct TRITONSERVER_ServerOptions* options, uint64_t size);
1932 struct TRITONSERVER_ServerOptions* options,
int gpu_device, uint64_t size);
1943 TRITONSERVER_ServerOptions* options,
int gpu_device,
1944 size_t cuda_virtual_address_size);
1957 struct TRITONSERVER_ServerOptions* options, uint64_t size);
1982 struct TRITONSERVER_ServerOptions* options,
const char* cache_name,
1983 const char* config_json);
1993 struct TRITONSERVER_ServerOptions* options,
const char* cache_dir);
2003 struct TRITONSERVER_ServerOptions* options,
double cc);
2013 struct TRITONSERVER_ServerOptions* options,
bool exit);
2023 struct TRITONSERVER_ServerOptions* options,
bool strict);
2033 struct TRITONSERVER_ServerOptions* options,
unsigned int timeout);
2042 struct TRITONSERVER_ServerOptions* options,
unsigned int thread_count);
2051 struct TRITONSERVER_ServerOptions* options,
unsigned int thread_count);
2060 struct TRITONSERVER_ServerOptions* options,
unsigned int retry_count);
2070 struct TRITONSERVER_ServerOptions* options,
bool enable_namespace);
2082 struct TRITONSERVER_ServerOptions* options,
bool enable_peer_access);
2093 struct TRITONSERVER_ServerOptions* options,
const char* file);
2102 struct TRITONSERVER_ServerOptions* options,
bool log);
2111 struct TRITONSERVER_ServerOptions* options,
bool log);
2120 struct TRITONSERVER_ServerOptions* options,
bool log);
2129 struct TRITONSERVER_ServerOptions* options,
2139 struct TRITONSERVER_ServerOptions* options,
int level);
2148 struct TRITONSERVER_ServerOptions* options,
bool metrics);
2159 struct TRITONSERVER_ServerOptions* options,
bool gpu_metrics);
2170 struct TRITONSERVER_ServerOptions* options,
bool cpu_metrics);
2181 struct TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms);
2194 struct TRITONSERVER_ServerOptions* options,
const char* backend_dir);
2206 struct TRITONSERVER_ServerOptions* options,
const char* repoagent_dir);
2222 struct TRITONSERVER_ServerOptions* options,
2224 const double fraction);
2236 struct TRITONSERVER_ServerOptions* options,
const char* backend_name,
2237 const char* setting,
const char* value);
2248 struct TRITONSERVER_ServerOptions* options,
const char* policy_name,
2249 const char* setting,
const char* value);
2261 struct TRITONSERVER_ServerOptions* options,
const char* name,
2262 const char* setting,
const char* value);
2295 struct TRITONSERVER_Server** server,
2296 struct TRITONSERVER_ServerOptions* options);
2304 struct TRITONSERVER_Server* server);
2312 struct TRITONSERVER_Server* server);
2323 struct TRITONSERVER_Server* server,
unsigned int timeout);
2335 struct TRITONSERVER_Server* server,
const char* repository_path,
2336 const struct TRITONSERVER_Parameter** name_mapping,
2337 const uint32_t mapping_count);
2346 struct TRITONSERVER_Server* server,
const char* repository_path);
2362 struct TRITONSERVER_Server* server,
bool* live);
2370 struct TRITONSERVER_Server* server,
bool* ready);
2383 struct TRITONSERVER_Server* server,
const char* model_name,
2384 const int64_t model_version,
bool* ready);
2412 struct TRITONSERVER_Server* server,
const char* model_name,
2413 const int64_t model_version, uint32_t* flags,
void** voidp);
2434 struct TRITONSERVER_Server* server,
const char* model_name,
2435 const int64_t model_version, uint32_t* txn_flags,
void** voidp);
2445 struct TRITONSERVER_Server* server,
2446 struct TRITONSERVER_Message** server_metadata);
2461 struct TRITONSERVER_Server* server,
const char* model_name,
2462 const int64_t model_version,
struct TRITONSERVER_Message** model_metadata);
2478 struct TRITONSERVER_Server* server,
const char* model_name,
2479 const int64_t model_version,
struct TRITONSERVER_Message** model_stats);
2496 struct TRITONSERVER_Server* server,
const char* model_name,
2497 const int64_t model_version,
const uint32_t config_version,
2498 struct TRITONSERVER_Message** model_config);
2516 struct TRITONSERVER_Server* server, uint32_t flags,
2517 struct TRITONSERVER_Message** model_index);
2528 struct TRITONSERVER_Server* server,
const char* model_name);
2546 struct TRITONSERVER_Server* server,
const char* model_name,
2547 const struct TRITONSERVER_Parameter** parameters,
2548 const uint64_t parameter_count);
2560 struct TRITONSERVER_Server* server,
const char* model_name);
2575 struct TRITONSERVER_Server* server,
const char* model_name);
2585 struct TRITONSERVER_Server* server,
struct TRITONSERVER_Metrics** metrics);
2609 struct TRITONSERVER_Server* server,
2610 struct TRITONSERVER_InferenceRequest* inference_request,
2611 struct TRITONSERVER_InferenceTrace* trace);
2635 struct TRITONSERVER_MetricFamily** family,
2637 const char* description);
2665 struct TRITONSERVER_MetricArgs** args);
2677 struct TRITONSERVER_MetricArgs* args,
const double* buckets,
2678 const uint64_t buckets_count);
2685 struct TRITONSERVER_MetricArgs* args);
2700 struct TRITONSERVER_Metric** metric,
2701 struct TRITONSERVER_MetricFamily* family,
2702 const struct TRITONSERVER_Parameter** labels,
const uint64_t label_count);
2721 struct TRITONSERVER_Metric** metric,
2722 struct TRITONSERVER_MetricFamily* family,
2723 const struct TRITONSERVER_Parameter** labels,
const uint64_t label_count,
2724 const struct TRITONSERVER_MetricArgs* args);
2734 struct TRITONSERVER_Metric* metric);
2745 struct TRITONSERVER_Metric* metric,
double* value);
2758 struct TRITONSERVER_Metric* metric,
double value);
2768 struct TRITONSERVER_Metric* metric,
double value);
2778 struct TRITONSERVER_Metric* metric,
double value);
void(* TRITONSERVER_InferenceTraceReleaseFn_t)(struct TRITONSERVER_InferenceTrace *trace, void *userp)
Type for trace release callback function.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t timeout_us)
Set the timeout for a request, in microseconds.
@ TRITONSERVER_TYPE_INT64
tritonserver_traceactivity_enum
Trace activities.
struct TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorReleaseFn_t)(struct TRITONSERVER_ResponseAllocator *allocator, void *buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
Type for function that is called when the server no longer holds any reference to a buffer allocated ...
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceLevelString(TRITONSERVER_InferenceTraceLevel level)
Get the string representation of a trace level.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(struct TRITONSERVER_ServerOptions *options, const TRITONSERVER_InstanceGroupKind kind, const int device_id, const double fraction)
Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'.
enum tritonserver_tracelevel_enum TRITONSERVER_InferenceTraceLevel
TRITONSERVER_InferenceTrace.
@ TRITONSERVER_TRACE_LEVEL_DISABLED
Tracing disabled. No trace activities are reported.
TRITONSERVER_logformat_enum
Logging Formats.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceSpawnChildTrace(struct TRITONSERVER_InferenceTrace *trace, struct TRITONSERVER_InferenceTrace **child_trace)
Get the child trace, spawned from the parent trace.
tritonserver_responsecompleteflag_enum
Inference response complete flags.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorMessage(struct TRITONSERVER_Error *error)
Get the error message.
struct TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorAllocFn_t)(struct TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp, void **buffer, void **buffer_userp, TRITONSERVER_MemoryType *actual_memory_type, int64_t *actual_memory_type_id)
TRITONSERVER_ResponseAllocator.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryType(struct TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType memory_type)
Set the memory type field of the buffer attributes.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCpuMetrics(struct TRITONSERVER_ServerOptions *options, bool cpu_metrics)
Enable or disable CPU metrics collection in a server options.
@ TRITONSERVER_MEMORY_CPU
@ TRITONSERVER_PARAMETER_BYTES
@ TRITONSERVER_LOG_ISO8601
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelVersion(struct TRITONSERVER_InferenceTrace *trace, int64_t *model_version)
Get the version of the model associated with a trace.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ApiVersion(uint32_t *major, uint32_t *minor)
Get the TRITONBACKEND API version supported by the Triton shared library.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveInput(struct TRITONSERVER_InferenceRequest *inference_request, const char *name)
Remove an input from a request.
@ TRITONSERVER_LOG_DEFAULT
@ TRITONSERVER_TYPE_UINT16
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameter(struct TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_ParameterType *type, const void **vvalue)
Get all information about a parameter.
@ TRITONSERVER_TYPE_INT16
struct TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorStartFn_t)(struct TRITONSERVER_ResponseAllocator *allocator, void *userp)
Type for function that is called to indicate that subsequent allocation requests will refer to a new ...
@ TRITONSERVER_TRACE_QUEUE_START
@ TRITONSERVER_TYPE_UINT64
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricNew(struct TRITONSERVER_Metric **metric, struct TRITONSERVER_MetricFamily *family, const struct TRITONSERVER_Parameter **labels, const uint64_t label_count)
Create a new metric object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictReadiness(struct TRITONSERVER_ServerOptions *options, bool strict)
Enable or disable strict readiness handling in a server options.
TRITONSERVER_metrickind_enum
TRITONSERVER_MetricKind.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceSetContext(struct TRITONSERVER_InferenceTrace *trace, const char *trace_context)
Set TRITONSERVER_InferenceTrace context.
@ TRITONSERVER_ERROR_CANCELLED
enum tritonserver_modelcontrolmode_enum TRITONSERVER_ModelControlMode
TRITONSERVER_ServerOptions.
@ TRITONSERVER_RATE_LIMIT_OFF
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseId(struct TRITONSERVER_InferenceResponse *inference_response, const char **request_id)
Get the ID of the request corresponding to a response.
@ TRITONSERVER_ERROR_NOT_FOUND
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorDelete(struct TRITONSERVER_ResponseAllocator *allocator)
Delete a response allocator.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricNewWithArgs(struct TRITONSERVER_Metric **metric, struct TRITONSERVER_MetricFamily *family, const struct TRITONSERVER_Parameter **labels, const uint64_t label_count, const struct TRITONSERVER_MetricArgs *args)
Create a new metric object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationIdString(struct TRITONSERVER_InferenceRequest *inference_request, const char **correlation_id)
Get the correlation ID of the inference request as a string.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetHostPolicy(struct TRITONSERVER_ServerOptions *options, const char *policy_name, const char *setting, const char *value)
Set a host policy setting for a given policy name in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseError(struct TRITONSERVER_InferenceResponse *inference_response)
Return the error status of an inference response.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetIntParameter(struct TRITONSERVER_InferenceRequest *request, const char *key, const int64_t value)
Set an integer parameter in the request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsNew(struct TRITONSERVER_ServerOptions **options)
Create a new server options object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily *family)
Delete a metric family object.
@ TRITONSERVER_TRACE_COMPUTE_END
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricsDelete(struct TRITONSERVER_Metrics *metrics)
Delete a metrics object.
@ TRITONSERVER_LOG_VERBOSE
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelName(struct TRITONSERVER_InferenceTrace *trace, const char **model_name)
Get the name of the model associated with a trace.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelIsReady(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, bool *ready)
Is the model ready?
enum TRITONSERVER_memorytype_enum TRITONSERVER_MemoryType
TRITONSERVER_MemoryType.
@ TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT
TRITONSERVER_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype)
Get the size of a Triton datatype in bytes.
@ TRITONSERVER_BATCH_UNKNOWN
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryTypeId(struct TRITONSERVER_BufferAttributes *buffer_attributes, int64_t memory_type_id)
Set the memory type id field of the buffer attributes.
@ TRITONSERVER_PARAMETER_DOUBLE
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestTimeoutMicroseconds(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t *timeout_us)
Get the timeout for a request, in microseconds.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(struct TRITONSERVER_ServerOptions *options, uint64_t size)
Set the total pinned memory byte size that the server can allocate in a server options.
@ TRITONSERVER_METRIC_PROMETHEUS
@ TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsAddRateLimiterResource(struct TRITONSERVER_ServerOptions *options, const char *resource_name, const size_t resource_count, const int device)
Add resource count for rate limiting.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceContext(struct TRITONSERVER_InferenceTrace *trace, const char **trace_context)
Get TRITONSERVER_InferenceTrace context.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize(TRITONSERVER_ServerOptions *options, int gpu_device, size_t cuda_virtual_address_size)
Set the size of the virtual address space that will be used for growable memory in implicit state.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerIsLive(struct TRITONSERVER_Server *server, bool *live)
Is the server live?
@ TRITONSERVER_MODEL_CONTROL_POLL
@ TRITONSERVER_TRACE_COMPUTE_INPUT_END
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveRequestedOutput(struct TRITONSERVER_InferenceRequest *inference_request, const char *name)
Remove an output request from an inference request.
@ TRITONSERVER_TXN_DECOUPLED
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetGpuMetrics(struct TRITONSERVER_ServerOptions *options, bool gpu_metrics)
Enable or disable GPU metrics collection in a server options.
@ TRITONSERVER_ERROR_UNKNOWN
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char *msg)
Create a new error object.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceActivityString(TRITONSERVER_InferenceTraceActivity activity)
Get the string representation of a trace activity.
enum TRITONSERVER_metrickind_enum TRITONSERVER_MetricKind
TRITONSERVER_MetricKind.
@ TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
Record timestamps for the inference request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelBatchProperties(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *flags, void **voidp)
Get the batch properties of the model.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRawInput(struct TRITONSERVER_InferenceRequest *inference_request, const char *name)
Add a raw input to a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseDelete(struct TRITONSERVER_InferenceResponse *inference_response)
TRITONSERVER_InferenceResponse.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceNew(struct TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
Create a new inference trace object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelStatistics(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, struct TRITONSERVER_Message **model_stats)
Get the statistics of a model as a TRITONSERVER_Message object.
@ TRITONSERVER_PARAMETER_INT
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesByteSize(struct TRITONSERVER_BufferAttributes *buffer_attributes, size_t *byte_size)
Get the byte size field of the buffer attributes.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_MemoryTypeString(TRITONSERVER_MemoryType memtype)
Get the string representation of a memory type.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendConfig(struct TRITONSERVER_ServerOptions *options, const char *backend_name, const char *setting, const char *value)
Set a configuration setting for a named backend in a server options.
@ TRITONSERVER_ERROR_ALREADY_EXISTS
struct TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorBufferAttributesFn_t)(struct TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, struct TRITONSERVER_BufferAttributes *buffer_attributes, void *userp, void *buffer_userp)
Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes.
tritonserver_modelindexflag_enum
Model index flags. The enum values must be power-of-2 values.
@ TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
#define TRITONSERVER_DECLSPEC
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationId(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t correlation_id)
Set the correlation ID of the inference request to be an unsigned integer.
TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete(struct TRITONSERVER_Parameter *parameter)
Delete an parameter object.
@ TRITONSERVER_TRACE_LEVEL_TENSORS
Record input and output tensor values for the inference request.
enum tritonserver_requestreleaseflag_enum TRITONSERVER_RequestReleaseFlag
Inference request release flags.
@ TRITONSERVER_METRIC_KIND_GAUGE
void(* TRITONSERVER_InferenceTraceTensorActivityFn_t)(struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, const char *name, TRITONSERVER_DataType datatype, const void *base, size_t byte_size, const int64_t *shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)
Type for trace tensor activity callback function.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerIsReady(struct TRITONSERVER_Server *server, bool *ready)
Is the server ready?
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetrics(struct TRITONSERVER_ServerOptions *options, bool metrics)
Enable or disable metrics collection in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetByteSize(struct TRITONSERVER_BufferAttributes *buffer_attributes, size_t byte_size)
Set the byte size field of the buffer attributes.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogError(struct TRITONSERVER_ServerOptions *options, bool log)
Enable or disable error level logging.
@ TRITONSERVER_TYPE_BYTES
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryTypeId(struct TRITONSERVER_BufferAttributes *buffer_attributes, int64_t *memory_type_id)
Get the memory type id field of the buffer attributes.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetQueryFunction(struct TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
Set the query function to a response allocator object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputData(struct TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
Assign a buffer of data to an input.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogInfo(struct TRITONSERVER_ServerOptions *options, bool log)
Enable or disable info level logging.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelConfigName(struct TRITONSERVER_ServerOptions *options, const char *model_config_name)
Set the custom model configuration name to load for all models.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetEnablePeerAccess(struct TRITONSERVER_ServerOptions *options, bool enable_peer_access)
Enable peer access to allow GPU device to directly access the memory of another GPU device.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricSet(struct TRITONSERVER_Metric *metric, double value)
Set the current value of metric to value.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerUnloadModel(struct TRITONSERVER_Server *server, const char *model_name)
Unload the requested model.
@ TRITONSERVER_INSTANCEGROUPKIND_MODEL
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestDelete(struct TRITONSERVER_InferenceRequest *inference_request)
Delete an inference request object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesNew(struct TRITONSERVER_BufferAttributes **buffer_attributes)
TRITONSERVER_BufferAttributes.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetPriority(struct TRITONSERVER_InferenceRequest *inference_request, uint32_t priority)
Deprecated.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricDelete(struct TRITONSERVER_Metric *metric)
Delete a metric object.
enum TRITONSERVER_errorcode_enum TRITONSERVER_Error_Code
TRITONSERVER_Error.
TRITONSERVER_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType(const char *dtype)
Get the Triton datatype corresponding to a string representation of a datatype.
enum TRITONSERVER_logformat_enum TRITONSERVER_LogFormat
Logging Formats.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorCodeString(struct TRITONSERVER_Error *error)
Get the string representation of an error code.
@ TRITONSERVER_REQUEST_RELEASE_ALL
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(struct TRITONSERVER_ServerOptions *options, int gpu_device, uint64_t size)
Set the total CUDA memory byte size that the server can allocate on given GPU device in a server opti...
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelControlMode(struct TRITONSERVER_ServerOptions *options, TRITONSERVER_ModelControlMode mode)
Set the model control mode in a server options.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_DataTypeString(TRITONSERVER_DataType datatype)
Get the string representation of a data type.
@ TRITONSERVER_METRIC_KIND_COUNTER
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsDelete(struct TRITONSERVER_ServerOptions *options)
Delete a server options object.
@ TRITONSERVER_TXN_ONE_TO_ONE
TRITONSERVER_errorcode_enum
TRITONSERVER_Error.
@ TRITONSERVER_RATE_LIMIT_EXEC_COUNT
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryType(struct TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType *memory_type)
Get the memory type field of the buffer attributes.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricObserve(struct TRITONSERVER_Metric *metric, double value)
Sample an observation and count it to the appropriate bucket of a metric.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerStop(struct TRITONSERVER_Server *server)
Stop a server object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricArgsNew(struct TRITONSERVER_MetricArgs **args)
Create a new metric args object.
void(* TRITONSERVER_InferenceResponseCompleteFn_t)(struct TRITONSERVER_InferenceResponse *response, const uint32_t flags, void *userp)
Type for callback function indicating that an inference response has completed.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricIncrement(struct TRITONSERVER_Metric *metric, double value)
Increment the current value of metric by value.
@ TRITONSERVER_INDEX_FLAG_READY
enum tritonserver_txn_property_flag_enum TRITONSERVER_ModelTxnPropertyFlag
Model transaction policy flags.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ParameterTypeString(TRITONSERVER_ParameterType paramtype)
Get the string representation of a parameter type.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetBoolParameter(struct TRITONSERVER_InferenceRequest *request, const char *key, const bool value)
Set a boolean parameter in the request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetricsConfig(struct TRITONSERVER_ServerOptions *options, const char *name, const char *setting, const char *value)
Set a configuration setting for metrics in server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(struct TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, struct TRITONSERVER_BufferAttributes *buffer_attributes)
Assign a buffer of data to an input.
TRITONSERVER_memorytype_enum
TRITONSERVER_MemoryType.
@ TRITONSERVER_TRACE_REQUEST_END
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerRegisterModelRepository(struct TRITONSERVER_Server *server, const char *repository_path, const struct TRITONSERVER_Parameter **name_mapping, const uint32_t mapping_count)
Register a new model repository.
enum tritonserver_requestflag_enum TRITONSERVER_RequestFlag
TRITONSERVER_InferenceRequest.
enum tritonserver_traceactivity_enum TRITONSERVER_InferenceTraceActivity
Trace activities.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MessageSerializeToJson(struct TRITONSERVER_Message *message, const char **base, size_t *byte_size)
Get the base and size of the buffer containing the serialized message in JSON format.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelMetadata(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, struct TRITONSERVER_Message **model_metadata)
Get the metadata of a model as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendDirectory(struct TRITONSERVER_ServerOptions *options, const char *backend_dir)
Set the directory containing backend shared libraries.
@ TRITONSERVER_ERROR_INTERNAL
@ TRITONSERVER_MODEL_CONTROL_NONE
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestCancel(struct TRITONSERVER_InferenceRequest *inference_request)
Cancel an inference request.
@ TRITONSERVER_MODEL_CONTROL_EXPLICIT
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogWarn(struct TRITONSERVER_ServerOptions *options, bool log)
Enable or disable warning level logging.
struct TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorQueryFn_t)(struct TRITONSERVER_ResponseAllocator *allocator, void *userp, const char *tensor_name, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id)
Type for function that is called to query the allocator's preferred memory type and memory type ID.
enum TRITONSERVER_parametertype_enum TRITONSERVER_ParameterType
TRITONSERVER_ParameterType.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_GetMetricKind(struct TRITONSERVER_Metric *metric, TRITONSERVER_MetricKind *kind)
Get the TRITONSERVER_MetricKind of metric of its corresponding family.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputData(struct TRITONSERVER_InferenceRequest *inference_request, const char *name)
Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the in...
tritonserver_tracelevel_enum
TRITONSERVER_InferenceTrace.
TRITONSERVER_parametertype_enum
TRITONSERVER_ParameterType.
@ TRITONSERVER_TYPE_INVALID
@ TRITONSERVER_PARAMETER_STRING
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerMetadata(struct TRITONSERVER_Server *server, struct TRITONSERVER_Message **server_metadata)
Get the metadata of the server as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerNew(struct TRITONSERVER_Server **server, struct TRITONSERVER_ServerOptions *options)
Create a new server object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricValue(struct TRITONSERVER_Metric *metric, double *value)
Get the current value of a metric object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseModel(struct TRITONSERVER_InferenceResponse *inference_response, const char **model_name, int64_t *model_version)
Get model used to produce a response.
void(* TRITONSERVER_InferenceTraceActivityFn_t)(struct TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void *userp)
Type for trace timeline activity callback function.
enum TRITONSERVER_loglevel_enum TRITONSERVER_LogLevel
TRITONSERVER_Logging.
tritonserver_txn_property_flag_enum
Model transaction policy flags.
@ TRITONSERVER_REQUEST_RELEASE_RESCHEDULE
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetFlags(struct TRITONSERVER_InferenceRequest *inference_request, uint32_t flags)
Set the flag(s) associated with a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricArgsDelete(struct TRITONSERVER_MetricArgs *args)
Delete a metric args object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MessageNewFromSerializedJson(struct TRITONSERVER_Message **message, const char *base, size_t byte_size)
TRITONSERVER_Message.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MessageDelete(struct TRITONSERVER_Message *message)
Delete a message object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetId(struct TRITONSERVER_InferenceRequest *inference_request, const char *id)
Set the ID for a request.
enum TRITONSERVER_datatype_enum TRITONSERVER_DataType
TRITONSERVER_DataType.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesDelete(struct TRITONSERVER_BufferAttributes *buffer_attributes)
Delete a buffer attributes object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(struct TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
Set the buffer attributes function for a response allocator object.
@ TRITONSERVER_TRACE_COMPUTE_START
tritonserver_batchflag_enum
TRITONSERVER_Server.
@ TRITONSERVER_ERROR_INVALID_ARG
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerPollModelRepository(struct TRITONSERVER_Server *server)
Check the model repository for changes and update server state based on those changes.
@ TRITONSERVER_MEMORY_CPU_PINNED
enum tritonserver_responsecompleteflag_enum TRITONSERVER_ResponseCompleteFlag
Inference response complete flags.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerUnloadModelAndDependents(struct TRITONSERVER_Server *server, const char *model_name)
Unload the requested model, and also unload any dependent model that was loaded along with the reques...
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetStringParameter(struct TRITONSERVER_InferenceRequest *request, const char *key, const char *value)
Set a string parameter in the request.
@ TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricArgsSetHistogram(struct TRITONSERVER_MetricArgs *args, const double *buckets, const uint64_t buckets_count)
Set metric args with histogram metric parameter.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricsFormatted(struct TRITONSERVER_Metrics *metrics, TRITONSERVER_MetricFormat format, const char **base, size_t *byte_size)
Get a buffer containing the metrics in the specified format.
TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete(struct TRITONSERVER_Error *error)
Delete an error object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerLoadModel(struct TRITONSERVER_Server *server, const char *model_name)
Load the requested model or reload the model if it is already loaded.
@ TRITONSERVER_TYPE_UINT8
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(struct TRITONSERVER_ServerOptions *options, double cc)
Set the minimum support CUDA compute capability in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelRepositoryPath(struct TRITONSERVER_ServerOptions *options, const char *model_repository_path)
Set the model repository path in a server options.
@ TRITONSERVER_TYPE_INT32
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictModelConfig(struct TRITONSERVER_ServerOptions *options, bool strict)
Enable or disable strict model configuration handling in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter * TRITONSERVER_ParameterBytesNew(const char *name, const void *byte_ptr, const uint64_t size)
Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_LogMessage(TRITONSERVER_LogLevel level, const char *filename, const int line, const char *msg)
Log a message at a given log level if that level is enabled.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitOnError(struct TRITONSERVER_ServerOptions *options, bool exit)
Enable or disable exit-on-error in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerUnregisterModelRepository(struct TRITONSERVER_Server *server, const char *repository_path)
Unregister a model repository.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestId(struct TRITONSERVER_InferenceRequest *inference_request, const char **id)
Get the ID for a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(struct TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, const char *host_policy_name)
Assign a buffer of data to an input for execution on all model instances with the specified host poli...
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputClassificationLabel(struct TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const size_t class_index, const char **label)
Get a classification label associated with an output for a given index.
@ TRITONSERVER_TRACE_LEVEL_MIN
Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(struct TRITONSERVER_ServerOptions *options, unsigned int thread_count)
Set the number of threads used in buffer manager in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceTensorNew(struct TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
Create a new inference trace object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestPriorityUInt64(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t *priority)
Get the priority for a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetPriorityUInt64(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t priority)
Set the priority for a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelTransactionProperties(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *txn_flags, void **voidp)
Get the transaction policy of the model.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelIndex(struct TRITONSERVER_Server *server, uint32_t flags, struct TRITONSERVER_Message **model_index)
Get the index of all unique models in the model repositories as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorNew(struct TRITONSERVER_ResponseAllocator **allocator, TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn, TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn, TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
Create a new response allocator object.
@ TRITONSERVER_ERROR_UNAVAILABLE
@ TRITONSERVER_INSTANCEGROUPKIND_AUTO
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerLoadModelWithParameters(struct TRITONSERVER_Server *server, const char *model_name, const struct TRITONSERVER_Parameter **parameters, const uint64_t parameter_count)
Load the requested model or reload the model if it is already loaded, with load parameters provided.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputCount(struct TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
Get the number of outputs available in the response.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutput(struct TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_DataType *datatype, const int64_t **shape, uint64_t *dim_count, const void **base, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id, void **userp)
Get all information about an output tensor.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerInferAsync(struct TRITONSERVER_Server *server, struct TRITONSERVER_InferenceRequest *inference_request, struct TRITONSERVER_InferenceTrace *trace)
Perform inference using the meta-data and inputs supplied by the 'inference_request'.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCacheDirectory(struct TRITONSERVER_ServerOptions *options, const char *cache_dir)
Set the directory containing cache shared libraries.
@ TRITONSERVER_METRIC_KIND_HISTOGRAM
TRITONSERVER_instancegroupkind_enum
TRITONSERVER_InstanceGroupKind.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadRetryCount(struct TRITONSERVER_ServerOptions *options, unsigned int retry_count)
Set the number of retry to load a model in a server options.
tritonserver_modelcontrolmode_enum
TRITONSERVER_ServerOptions.
@ TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitTimeout(struct TRITONSERVER_ServerOptions *options, unsigned int timeout)
Set the exit timeout, in seconds, for the server in a server options.
TRITONSERVER_loglevel_enum
TRITONSERVER_Logging.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetDoubleParameter(struct TRITONSERVER_InferenceRequest *request, const char *key, const double value)
Set a double parameter in the request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetReleaseCallback(struct TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn, void *request_release_userp)
Set the release callback for an inference request.
@ TRITONSERVER_ERROR_UNSUPPORTED
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetResponseCacheByteSize(struct TRITONSERVER_ServerOptions *options, uint64_t size)
Deprecated.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_GetMetricFamilyKind(struct TRITONSERVER_MetricFamily *family, TRITONSERVER_MetricKind *kind)
Get the TRITONSERVER_MetricKind of the metric family.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InstanceGroupKindString(TRITONSERVER_InstanceGroupKind kind)
Get the string representation of an instance-group kind.
enum tritonserver_ratelimitmode_enum TRITONSERVER_RateLimitMode
Rate limit modes.
@ TRITONSERVER_TRACE_CUSTOM_ACTIVITY
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceId(struct TRITONSERVER_InferenceTrace *trace, uint64_t *id)
Get the id associated with a trace.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRequestedOutput(struct TRITONSERVER_InferenceRequest *inference_request, const char *name)
Add an output request to an inference request.
enum tritonserver_metricformat_enum TRITONSERVER_MetricFormat
TRITONSERVER_Metrics.
@ TRITONSERVER_TRACE_COMPUTE_OUTPUT_START
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestIsCancelled(struct TRITONSERVER_InferenceRequest *inference_request, bool *is_cancelled)
Query whether the request is cancelled or not.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestPriority(struct TRITONSERVER_InferenceRequest *inference_request, uint32_t *priority)
Deprecated.
@ TRITONSERVER_RESPONSE_COMPLETE_FINAL
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceDelete(struct TRITONSERVER_InferenceTrace *trace)
Delete a trace object.
void(* TRITONSERVER_InferenceRequestReleaseFn_t)(struct TRITONSERVER_InferenceRequest *request, const uint32_t flags, void *userp)
Type for inference request release callback function.
TRITONSERVER_datatype_enum
TRITONSERVER_DataType.
tritonserver_metricformat_enum
TRITONSERVER_Metrics.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter * TRITONSERVER_ParameterNew(const char *name, const TRITONSERVER_ParameterType type, const void *value)
Create a new parameter object.
@ TRITONSERVER_INSTANCEGROUPKIND_GPU
enum TRITONSERVER_instancegroupkind_enum TRITONSERVER_InstanceGroupKind
TRITONSERVER_InstanceGroupKind.
TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled(TRITONSERVER_LogLevel level)
Is a log level enabled?
tritonserver_requestflag_enum
TRITONSERVER_InferenceRequest.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRateLimiterMode(struct TRITONSERVER_ServerOptions *options, TRITONSERVER_RateLimitMode mode)
Set the rate limit mode in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRepoAgentDirectory(struct TRITONSERVER_ServerOptions *options, const char *repoagent_dir)
Set the directory containing repository agent shared libraries.
@ TRITONSERVER_TYPE_UINT32
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestFlags(struct TRITONSERVER_InferenceRequest *inference_request, uint32_t *flags)
Get the flag(s) associated with a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerModelConfig(struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, const uint32_t config_version, struct TRITONSERVER_Message **model_config)
Get the configuration of a model as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestNew(struct TRITONSERVER_InferenceRequest **inference_request, struct TRITONSERVER_Server *server, const char *model_name, const int64_t model_version)
Create a new inference request object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_MetricFamilyNew(struct TRITONSERVER_MetricFamily **family, const TRITONSERVER_MetricKind kind, const char *name, const char *description)
Create a new metric family object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerMetrics(struct TRITONSERVER_Server *server, struct TRITONSERVER_Metrics **metrics)
Get the current metrics for the server.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddInput(struct TRITONSERVER_InferenceRequest *inference_request, const char *name, const TRITONSERVER_DataType datatype, const int64_t *shape, uint64_t dim_count)
Add an input to a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetServerId(struct TRITONSERVER_ServerOptions *options, const char *server_id)
Set the textual ID for the server in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerDelete(struct TRITONSERVER_Server *server)
Delete a server object.
enum tritonserver_modelindexflag_enum TRITONSERVER_ModelIndexFlag
Model index flags. The enum values must be power-of-2 values.
@ TRITONSERVER_TRACE_LEVEL_MAX
Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetCudaIpcHandle(struct TRITONSERVER_BufferAttributes *buffer_attributes, void *cuda_ipc_handle)
Set the CudaIpcHandle field of the buffer attributes.
@ TRITONSERVER_TRACE_REQUEST_START
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFile(struct TRITONSERVER_ServerOptions *options, const char *file)
Provide a log output file.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStartupModel(struct TRITONSERVER_ServerOptions *options, const char *model_name)
Set the model to be loaded at startup in a server options.
@ TRITONSERVER_PARAMETER_BOOL
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameterCount(struct TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
Get the number of parameters available in the response.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetricsInterval(struct TRITONSERVER_ServerOptions *options, uint64_t metrics_interval_ms)
Set the interval for metrics collection in a server options.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_BufferAttributesCudaIpcHandle(struct TRITONSERVER_BufferAttributes *buffer_attributes, void **cuda_ipc_handle)
Get the CudaIpcHandle field of the buffer attributes object.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(struct TRITONSERVER_InferenceRequest *inference_request)
Remove all output requests from an inference request.
enum tritonserver_batchflag_enum TRITONSERVER_ModelBatchFlag
TRITONSERVER_Server.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerSetExitTimeout(struct TRITONSERVER_Server *server, unsigned int timeout)
Set the exit timeout on the server object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(struct TRITONSERVER_Error *error)
Get the error code.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationId(struct TRITONSERVER_InferenceRequest *inference_request, uint64_t *correlation_id)
Get the correlation ID of the inference request as an unsigned integer.
@ TRITONSERVER_MEMORY_GPU
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogVerbose(struct TRITONSERVER_ServerOptions *options, int level)
Set verbose logging level.
tritonserver_requestreleaseflag_enum
Inference request release flags.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceRequestId(struct TRITONSERVER_InferenceTrace *trace, const char **request_id)
Get the request id associated with a trace.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceTraceParentId(struct TRITONSERVER_InferenceTrace *trace, uint64_t *parent_id)
Get the parent id associated with a trace.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCacheConfig(struct TRITONSERVER_ServerOptions *options, const char *cache_name, const char *config_json)
Set the cache config that will be used to initialize the cache implementation for "cache_name".
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationIdString(struct TRITONSERVER_InferenceRequest *inference_request, const char *correlation_id)
Set the correlation ID of the inference request to be a string.
tritonserver_ratelimitmode_enum
Rate limit modes.
@ TRITONSERVER_BATCH_FIRST_DIM
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFormat(struct TRITONSERVER_ServerOptions *options, const TRITONSERVER_LogFormat format)
Set the logging format.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceReportActivity(TRITONSERVER_InferenceTrace *trace, uint64_t timestamp, const char *activity_name)
Report a trace activity.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetResponseCallback(struct TRITONSERVER_InferenceRequest *inference_request, struct TRITONSERVER_ResponseAllocator *response_allocator, void *response_allocator_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void *response_userp)
Set the allocator and response callback for an inference request.
@ TRITONSERVER_INSTANCEGROUPKIND_CPU
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelNamespacing(struct TRITONSERVER_ServerOptions *options, bool enable_namespace)
Enable model namespacing to allow serving models with the same name if they are in different namespac...
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputs(struct TRITONSERVER_InferenceRequest *inference_request)
Remove all inputs from a request.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadThreadCount(struct TRITONSERVER_ServerOptions *options, unsigned int thread_count)
Set the number of threads to concurrently load models in a server options.