diff --git a/docs/reference.asciidoc b/docs/reference.asciidoc index 5d85a21cb..086412e4d 100644 --- a/docs/reference.asciidoc +++ b/docs/reference.asciidoc @@ -2479,7 +2479,6 @@ A partial reduction is performed every time the coordinating node has received a ** *`ignore_unavailable` (Optional, boolean)*: Whether specified concrete indices should be ignored when unavailable (missing or closed) ** *`lenient` (Optional, boolean)*: Specify whether format-based query failures (such as providing text to a numeric field) should be ignored ** *`max_concurrent_shard_requests` (Optional, number)*: The number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests -** *`min_compatible_shard_node` (Optional, string)* ** *`preference` (Optional, string)*: Specify the node or shard the operation should be performed on (default: random) ** *`request_cache` (Optional, boolean)*: Specify if request cache should be used for this request or not, defaults to true ** *`routing` (Optional, string)*: A list of specific routing values @@ -2622,9 +2621,6 @@ It supports a list of values, such as `open,hidden`. local cluster state. If `false` the list of selected nodes are computed from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node. -** *`master_timeout` (Optional, string | -1 | 0)*: The period to wait for a connection to the master node. -If the master node is not available before the timeout expires, the request fails and returns an error. -To indicated that the request should never timeout, you can set it to `-1`. [discrete] ==== allocation @@ -8402,6 +8398,70 @@ These settings are specific to the `cohere` service. These settings are specific to the task type you specified. ** *`timeout` (Optional, string | -1 | 0)*: Specifies the amount of time to wait for the inference endpoint to be created. +[discrete] +==== put_custom +Create a custom inference endpoint. + +The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations. +The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets. +The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key. +Templates are portions of a string that start with `${` and end with `}`. +The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`. +If the definition (key) is not found for a template, an error message is returned. +In case of an endpoint definition like the following: +---- +PUT _inference/text_embedding/test-text-embedding +{ + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "...endpoints.huggingface.cloud/v1/embeddings", + "headers": { + "Authorization": "Bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"input\": ${input}}", + "response": { + "json_parser": { + "text_embeddings":"$.data[*].embedding[*]" + } + } + } +} +---- +To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`. + +> info +> Templates should not be surrounded by quotes. + +Pre-defined templates: +* `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests. +* `${input_type}` refers to the input type translation values. +* `${query}` refers to the query field used specifically for reranking tasks. +* `${top_n}` refers to the `top_n` field available when performing rerank requests. +* `${return_documents}` refers to the `return_documents` field available when performing rerank requests. + +https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom[Endpoint documentation] +[source,ts] +---- +client.inference.putCustom({ task_type, custom_inference_id, service, service_settings }) +---- + +[discrete] +==== Arguments + +* *Request (object):* +** *`task_type` (Enum("text_embedding" | "sparse_embedding" | "rerank" | "completion"))*: The type of the inference task that the model will perform. +** *`custom_inference_id` (string)*: The unique identifier of the inference endpoint. +** *`service` (Enum("custom"))*: The type of service supported for the specified task type. In this case, `custom`. +** *`service_settings` ({ headers, input_type, query_parameters, request, response, secret_parameters, url })*: Settings used to install the inference model. +These settings are specific to the `custom` service. +** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object. +** *`task_settings` (Optional, { parameters })*: Settings to configure the inference task. +These settings are specific to the task type you specified. + [discrete] ==== put_deepseek Create a DeepSeek inference endpoint. diff --git a/src/api/api/inference.ts b/src/api/api/inference.ts index a63390658..eac686fa7 100644 --- a/src/api/api/inference.ts +++ b/src/api/api/inference.ts @@ -609,6 +609,51 @@ export default class Inference { return await this.transport.request({ path, method, querystring, body, meta }, options) } + /** + * Create a custom inference endpoint. The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations. The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets. The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key. Templates are portions of a string that start with `${` and end with `}`. The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`. If the definition (key) is not found for a template, an error message is returned. In case of an endpoint definition like the following: ``` PUT _inference/text_embedding/test-text-embedding { "service": "custom", "service_settings": { "secret_parameters": { "api_key": "" }, "url": "...endpoints.huggingface.cloud/v1/embeddings", "headers": { "Authorization": "Bearer ${api_key}", "Content-Type": "application/json" }, "request": "{\"input\": ${input}}", "response": { "json_parser": { "text_embeddings":"$.data[*].embedding[*]" } } } } ``` To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`. > info > Templates should not be surrounded by quotes. Pre-defined templates: * `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests. * `${input_type}` refers to the input type translation values. * `${query}` refers to the query field used specifically for reranking tasks. * `${top_n}` refers to the `top_n` field available when performing rerank requests. * `${return_documents}` refers to the `return_documents` field available when performing rerank requests. + * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom | Elasticsearch API documentation} + */ + async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptionsWithMeta): Promise> + async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptions): Promise + async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptions): Promise { + const acceptedPath: string[] = ['task_type', 'custom_inference_id'] + const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings'] + const querystring: Record = {} + // @ts-expect-error + const userBody: any = params?.body + let body: Record | string + if (typeof userBody === 'string') { + body = userBody + } else { + body = userBody != null ? { ...userBody } : undefined + } + + for (const key in params) { + if (acceptedBody.includes(key)) { + body = body ?? {} + // @ts-expect-error + body[key] = params[key] + } else if (acceptedPath.includes(key)) { + continue + } else if (key !== 'body') { + // @ts-expect-error + querystring[key] = params[key] + } + } + + const method = 'PUT' + const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.custom_inference_id.toString())}` + const meta: TransportRequestMetadata = { + name: 'inference.put_custom', + pathParts: { + task_type: params.task_type, + custom_inference_id: params.custom_inference_id + } + } + return await this.transport.request({ path, method, querystring, body, meta }, options) + } + /** * Create a DeepSeek inference endpoint. Create an inference endpoint to perform an inference task with the `deepseek` service. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html | Elasticsearch API documentation} diff --git a/src/api/types.ts b/src/api/types.ts index ed1f41e58..1dd850938 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -3500,9 +3500,10 @@ export interface AggregationsFiltersAggregation extends AggregationsBucketAggreg } export interface AggregationsFiltersBucketKeys extends AggregationsMultiBucketBase { + key?: string } export type AggregationsFiltersBucket = AggregationsFiltersBucketKeys -& { [property: string]: AggregationsAggregate | long } +& { [property: string]: AggregationsAggregate | string | long } export interface AggregationsFormatMetricAggregationBase extends AggregationsMetricAggregationBase { format?: string @@ -4672,7 +4673,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase { custom_token_chars?: string max_gram?: integer min_gram?: integer - token_chars?: string | AnalysisTokenChar[] + token_chars?: AnalysisTokenChar[] } export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase { @@ -6961,7 +6962,6 @@ export interface AsyncSearchSubmitRequest extends RequestBase { ignore_unavailable?: boolean lenient?: boolean max_concurrent_shard_requests?: long - min_compatible_shard_node?: VersionString preference?: string request_cache?: boolean routing?: Routing @@ -7150,7 +7150,6 @@ export interface CatAliasesRequest extends CatCatRequestBase { s?: Names expand_wildcards?: ExpandWildcards local?: boolean - master_timeout?: Duration } export type CatAliasesResponse = CatAliasesAliasesRecord[] @@ -10809,7 +10808,14 @@ export interface EsqlAsyncEsqlResult extends EsqlEsqlResult { is_running: boolean } -export interface EsqlClusterInfo { +export interface EsqlEsqlClusterDetails { + status: EsqlEsqlClusterStatus + indices: string + took?: DurationValue + _shards?: EsqlEsqlShardInfo +} + +export interface EsqlEsqlClusterInfo { total: integer successful: integer running: integer @@ -10819,27 +10825,20 @@ export interface EsqlClusterInfo { details: Record } -export interface EsqlColumnInfo { +export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed' + +export interface EsqlEsqlColumnInfo { name: string type: string } -export interface EsqlEsqlClusterDetails { - status: EsqlEsqlClusterStatus - indices: string - took?: DurationValue - _shards?: EsqlEsqlShardInfo -} - -export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed' - export interface EsqlEsqlResult { took?: DurationValue is_partial?: boolean - all_columns?: EsqlColumnInfo[] - columns: EsqlColumnInfo[] + all_columns?: EsqlEsqlColumnInfo[] + columns: EsqlEsqlColumnInfo[] values: FieldValue[][] - _clusters?: EsqlClusterInfo + _clusters?: EsqlEsqlClusterInfo profile?: any } @@ -13471,6 +13470,32 @@ export interface InferenceContentObject { type: string } +export interface InferenceCustomRequestParams { + content: string +} + +export interface InferenceCustomResponseParams { + json_parser: any +} + +export interface InferenceCustomServiceSettings { + headers?: any + input_type?: any + query_parameters?: any + request: InferenceCustomRequestParams + response: InferenceCustomResponseParams + secret_parameters: any + url?: string +} + +export type InferenceCustomServiceType = 'custom' + +export interface InferenceCustomTaskSettings { + parameters?: any +} + +export type InferenceCustomTaskType = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion' + export interface InferenceDeepSeekServiceSettings { api_key: string model_id: string @@ -13605,6 +13630,11 @@ export interface InferenceInferenceEndpointInfoCohere extends InferenceInference task_type: InferenceTaskTypeCohere } +export interface InferenceInferenceEndpointInfoCustom extends InferenceInferenceEndpoint { + inference_id: string + task_type: InferenceTaskTypeCustom +} + export interface InferenceInferenceEndpointInfoDeepSeek extends InferenceInferenceEndpoint { inference_id: string task_type: InferenceTaskTypeDeepSeek @@ -13780,6 +13810,8 @@ export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion' export type InferenceTaskTypeCohere = 'text_embedding' | 'rerank' | 'completion' +export type InferenceTaskTypeCustom = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion' + export type InferenceTaskTypeDeepSeek = 'completion' | 'chat_completion' export type InferenceTaskTypeELSER = 'sparse_embedding' @@ -13986,6 +14018,17 @@ export interface InferencePutCohereRequest extends RequestBase { export type InferencePutCohereResponse = InferenceInferenceEndpointInfoCohere +export interface InferencePutCustomRequest extends RequestBase { + task_type: InferenceCustomTaskType + custom_inference_id: Id + chunking_settings?: InferenceInferenceChunkingSettings + service: InferenceCustomServiceType + service_settings: InferenceCustomServiceSettings + task_settings?: InferenceCustomTaskSettings +} + +export type InferencePutCustomResponse = InferenceInferenceEndpointInfoCustom + export interface InferencePutDeepseekRequest extends RequestBase { task_type: InferenceTaskTypeDeepSeek deepseek_inference_id: Id diff --git a/src/api/typesWithBodyKey.ts b/src/api/typesWithBodyKey.ts index 6aa9c313d..61b70808f 100644 --- a/src/api/typesWithBodyKey.ts +++ b/src/api/typesWithBodyKey.ts @@ -3577,9 +3577,10 @@ export interface AggregationsFiltersAggregation extends AggregationsBucketAggreg } export interface AggregationsFiltersBucketKeys extends AggregationsMultiBucketBase { + key?: string } export type AggregationsFiltersBucket = AggregationsFiltersBucketKeys -& { [property: string]: AggregationsAggregate | long } +& { [property: string]: AggregationsAggregate | string | long } export interface AggregationsFormatMetricAggregationBase extends AggregationsMetricAggregationBase { format?: string @@ -4749,7 +4750,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase { custom_token_chars?: string max_gram?: integer min_gram?: integer - token_chars?: string | AnalysisTokenChar[] + token_chars?: AnalysisTokenChar[] } export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase { @@ -7038,7 +7039,6 @@ export interface AsyncSearchSubmitRequest extends RequestBase { ignore_unavailable?: boolean lenient?: boolean max_concurrent_shard_requests?: long - min_compatible_shard_node?: VersionString preference?: string request_cache?: boolean routing?: Routing @@ -7231,7 +7231,6 @@ export interface CatAliasesRequest extends CatCatRequestBase { s?: Names expand_wildcards?: ExpandWildcards local?: boolean - master_timeout?: Duration } export type CatAliasesResponse = CatAliasesAliasesRecord[] @@ -10980,7 +10979,14 @@ export interface EsqlAsyncEsqlResult extends EsqlEsqlResult { is_running: boolean } -export interface EsqlClusterInfo { +export interface EsqlEsqlClusterDetails { + status: EsqlEsqlClusterStatus + indices: string + took?: DurationValue + _shards?: EsqlEsqlShardInfo +} + +export interface EsqlEsqlClusterInfo { total: integer successful: integer running: integer @@ -10990,27 +10996,20 @@ export interface EsqlClusterInfo { details: Record } -export interface EsqlColumnInfo { +export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed' + +export interface EsqlEsqlColumnInfo { name: string type: string } -export interface EsqlEsqlClusterDetails { - status: EsqlEsqlClusterStatus - indices: string - took?: DurationValue - _shards?: EsqlEsqlShardInfo -} - -export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed' - export interface EsqlEsqlResult { took?: DurationValue is_partial?: boolean - all_columns?: EsqlColumnInfo[] - columns: EsqlColumnInfo[] + all_columns?: EsqlEsqlColumnInfo[] + columns: EsqlEsqlColumnInfo[] values: FieldValue[][] - _clusters?: EsqlClusterInfo + _clusters?: EsqlEsqlClusterInfo profile?: any } @@ -13713,6 +13712,32 @@ export interface InferenceContentObject { type: string } +export interface InferenceCustomRequestParams { + content: string +} + +export interface InferenceCustomResponseParams { + json_parser: any +} + +export interface InferenceCustomServiceSettings { + headers?: any + input_type?: any + query_parameters?: any + request: InferenceCustomRequestParams + response: InferenceCustomResponseParams + secret_parameters: any + url?: string +} + +export type InferenceCustomServiceType = 'custom' + +export interface InferenceCustomTaskSettings { + parameters?: any +} + +export type InferenceCustomTaskType = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion' + export interface InferenceDeepSeekServiceSettings { api_key: string model_id: string @@ -13847,6 +13872,11 @@ export interface InferenceInferenceEndpointInfoCohere extends InferenceInference task_type: InferenceTaskTypeCohere } +export interface InferenceInferenceEndpointInfoCustom extends InferenceInferenceEndpoint { + inference_id: string + task_type: InferenceTaskTypeCustom +} + export interface InferenceInferenceEndpointInfoDeepSeek extends InferenceInferenceEndpoint { inference_id: string task_type: InferenceTaskTypeDeepSeek @@ -14022,6 +14052,8 @@ export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion' export type InferenceTaskTypeCohere = 'text_embedding' | 'rerank' | 'completion' +export type InferenceTaskTypeCustom = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion' + export type InferenceTaskTypeDeepSeek = 'completion' | 'chat_completion' export type InferenceTaskTypeELSER = 'sparse_embedding' @@ -14254,6 +14286,20 @@ export interface InferencePutCohereRequest extends RequestBase { export type InferencePutCohereResponse = InferenceInferenceEndpointInfoCohere +export interface InferencePutCustomRequest extends RequestBase { + task_type: InferenceCustomTaskType + custom_inference_id: Id + /** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */ + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferenceCustomServiceType + service_settings: InferenceCustomServiceSettings + task_settings?: InferenceCustomTaskSettings + } +} + +export type InferencePutCustomResponse = InferenceInferenceEndpointInfoCustom + export interface InferencePutDeepseekRequest extends RequestBase { task_type: InferenceTaskTypeDeepSeek deepseek_inference_id: Id