elastic · JoshMock · Jul 24, 2025 · Jul 24, 2025
diff --git a/docs/reference.asciidoc b/docs/reference.asciidoc
@@ -2479,7 +2479,6 @@ A partial reduction is performed every time the coordinating node has received a
 ** *`ignore_unavailable` (Optional, boolean)*: Whether specified concrete indices should be ignored when unavailable (missing or closed)
 ** *`lenient` (Optional, boolean)*: Specify whether format-based query failures (such as providing text to a numeric field) should be ignored
 ** *`max_concurrent_shard_requests` (Optional, number)*: The number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests
-** *`min_compatible_shard_node` (Optional, string)*
 ** *`preference` (Optional, string)*: Specify the node or shard the operation should be performed on (default: random)
 ** *`request_cache` (Optional, boolean)*: Specify if request cache should be used for this request or not, defaults to true
 ** *`routing` (Optional, string)*: A list of specific routing values
@@ -2622,9 +2621,6 @@ It supports a list of values, such as `open,hidden`.
 local cluster state. If `false` the list of selected nodes are computed
 from the cluster state of the master node. In both cases the coordinating
 node will send requests for further information to each selected node.
-** *`master_timeout` (Optional, string | -1 | 0)*: The period to wait for a connection to the master node.
-If the master node is not available before the timeout expires, the request fails and returns an error.
-To indicated that the request should never timeout, you can set it to `-1`.
 
 [discrete]
 ==== allocation
@@ -8402,6 +8398,70 @@ These settings are specific to the `cohere` service.
 These settings are specific to the task type you specified.
 ** *`timeout` (Optional, string | -1 | 0)*: Specifies the amount of time to wait for the inference endpoint to be created.
 
+[discrete]
+==== put_custom
+Create a custom inference endpoint.
+
+The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
+The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
+The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
+Templates are portions of a string that start with `${` and end with `}`.
+The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`.
+If the definition (key) is not found for a template, an error message is returned.
+In case of an endpoint definition like the following:
+----
+PUT _inference/text_embedding/test-text-embedding
+{
+  "service": "custom",
+  "service_settings": {
+     "secret_parameters": {
+          "api_key": "<some api key>"
+     },
+     "url": "...endpoints.huggingface.cloud/v1/embeddings",
+     "headers": {
+         "Authorization": "Bearer ${api_key}",
+         "Content-Type": "application/json"
+     },
+     "request": "{\"input\": ${input}}",
+     "response": {
+         "json_parser": {
+             "text_embeddings":"$.data[*].embedding[*]"
+         }
+     }
+  }
+}
+----
+To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`.
+
+> info
+> Templates should not be surrounded by quotes.
+
+Pre-defined templates:
+* `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests.
+* `${input_type}` refers to the input type translation values.
+* `${query}` refers to the query field used specifically for reranking tasks.
+* `${top_n}` refers to the `top_n` field available when performing rerank requests.
+* `${return_documents}` refers to the `return_documents` field available when performing rerank requests.
+
+https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom[Endpoint documentation]
+[source,ts]
+----
+client.inference.putCustom({ task_type, custom_inference_id, service, service_settings })
+----
+
+[discrete]
+==== Arguments
+
+* *Request (object):*
+** *`task_type` (Enum("text_embedding" | "sparse_embedding" | "rerank" | "completion"))*: The type of the inference task that the model will perform.
+** *`custom_inference_id` (string)*: The unique identifier of the inference endpoint.
+** *`service` (Enum("custom"))*: The type of service supported for the specified task type. In this case, `custom`.
+** *`service_settings` ({ headers, input_type, query_parameters, request, response, secret_parameters, url })*: Settings used to install the inference model.
+These settings are specific to the `custom` service.
+** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
+** *`task_settings` (Optional, { parameters })*: Settings to configure the inference task.
+These settings are specific to the task type you specified.
+
 [discrete]
 ==== put_deepseek
 Create a DeepSeek inference endpoint.

diff --git a/src/api/api/inference.ts b/src/api/api/inference.ts
@@ -609,6 +609,51 @@ export default class Inference {
     return await this.transport.request({ path, method, querystring, body, meta }, options)
   }
 
+  /**
+    * Create a custom inference endpoint. The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations. The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets. The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key. Templates are portions of a string that start with `${` and end with `}`. The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`. If the definition (key) is not found for a template, an error message is returned. In case of an endpoint definition like the following: ``` PUT _inference/text_embedding/test-text-embedding { "service": "custom", "service_settings": { "secret_parameters": { "api_key": "<some api key>" }, "url": "...endpoints.huggingface.cloud/v1/embeddings", "headers": { "Authorization": "Bearer ${api_key}", "Content-Type": "application/json" }, "request": "{\"input\": ${input}}", "response": { "json_parser": { "text_embeddings":"$.data[*].embedding[*]" } } } } ``` To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`. > info > Templates should not be surrounded by quotes. Pre-defined templates: * `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests. * `${input_type}` refers to the input type translation values. * `${query}` refers to the query field used specifically for reranking tasks. * `${top_n}` refers to the `top_n` field available when performing rerank requests. * `${return_documents}` refers to the `return_documents` field available when performing rerank requests.
+    * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom | Elasticsearch API documentation}
+    */
+  async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutCustomResponse>
+  async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutCustomResponse, unknown>>
+  async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptions): Promise<T.InferencePutCustomResponse>
+  async putCustom (this: That, params: T.InferencePutCustomRequest | TB.InferencePutCustomRequest, options?: TransportRequestOptions): Promise<any> {
+    const acceptedPath: string[] = ['task_type', 'custom_inference_id']
+    const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
+    const querystring: Record<string, any> = {}
+    // @ts-expect-error
+    const userBody: any = params?.body
+    let body: Record<string, any> | string
+    if (typeof userBody === 'string') {
+      body = userBody
+    } else {
+      body = userBody != null ? { ...userBody } : undefined
+    }
+
+    for (const key in params) {
+      if (acceptedBody.includes(key)) {
+        body = body ?? {}
+        // @ts-expect-error
+        body[key] = params[key]
+      } else if (acceptedPath.includes(key)) {
+        continue
+      } else if (key !== 'body') {
+        // @ts-expect-error
+        querystring[key] = params[key]
+      }
+    }
+
+    const method = 'PUT'
+    const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.custom_inference_id.toString())}`
+    const meta: TransportRequestMetadata = {
+      name: 'inference.put_custom',
+      pathParts: {
+        task_type: params.task_type,
+        custom_inference_id: params.custom_inference_id
+      }
+    }
+    return await this.transport.request({ path, method, querystring, body, meta }, options)
+  }
+
   /**
     * Create a DeepSeek inference endpoint. Create an inference endpoint to perform an inference task with the `deepseek` service.
     * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html | Elasticsearch API documentation}

diff --git a/src/api/types.ts b/src/api/types.ts
@@ -3500,9 +3500,10 @@ export interface AggregationsFiltersAggregation extends AggregationsBucketAggreg
 }
 
 export interface AggregationsFiltersBucketKeys extends AggregationsMultiBucketBase {
+  key?: string
 }
 export type AggregationsFiltersBucket = AggregationsFiltersBucketKeys
-& { [property: string]: AggregationsAggregate | long }
+& { [property: string]: AggregationsAggregate | string | long }
 
 export interface AggregationsFormatMetricAggregationBase extends AggregationsMetricAggregationBase {
   format?: string
@@ -4672,7 +4673,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase {
   custom_token_chars?: string
   max_gram?: integer
   min_gram?: integer
-  token_chars?: string | AnalysisTokenChar[]
+  token_chars?: AnalysisTokenChar[]
 }
 
 export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase {
@@ -6961,7 +6962,6 @@ export interface AsyncSearchSubmitRequest extends RequestBase {
   ignore_unavailable?: boolean
   lenient?: boolean
   max_concurrent_shard_requests?: long
-  min_compatible_shard_node?: VersionString
   preference?: string
   request_cache?: boolean
   routing?: Routing
@@ -7150,7 +7150,6 @@ export interface CatAliasesRequest extends CatCatRequestBase {
   s?: Names
   expand_wildcards?: ExpandWildcards
   local?: boolean
-  master_timeout?: Duration
 }
 
 export type CatAliasesResponse = CatAliasesAliasesRecord[]
@@ -10809,7 +10808,14 @@ export interface EsqlAsyncEsqlResult extends EsqlEsqlResult {
   is_running: boolean
 }
 
-export interface EsqlClusterInfo {
+export interface EsqlEsqlClusterDetails {
+  status: EsqlEsqlClusterStatus
+  indices: string
+  took?: DurationValue<UnitMillis>
+  _shards?: EsqlEsqlShardInfo
+}
+
+export interface EsqlEsqlClusterInfo {
   total: integer
   successful: integer
   running: integer
@@ -10819,27 +10825,20 @@ export interface EsqlClusterInfo {
   details: Record<string, EsqlEsqlClusterDetails>
 }
 
-export interface EsqlColumnInfo {
+export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed'
+
+export interface EsqlEsqlColumnInfo {
   name: string
   type: string
 }
 
-export interface EsqlEsqlClusterDetails {
-  status: EsqlEsqlClusterStatus
-  indices: string
-  took?: DurationValue<UnitMillis>
-  _shards?: EsqlEsqlShardInfo
-}
-
-export type EsqlEsqlClusterStatus = 'running' | 'successful' | 'partial' | 'skipped' | 'failed'
-
 export interface EsqlEsqlResult {
   took?: DurationValue<UnitMillis>
   is_partial?: boolean
-  all_columns?: EsqlColumnInfo[]
-  columns: EsqlColumnInfo[]
+  all_columns?: EsqlEsqlColumnInfo[]
+  columns: EsqlEsqlColumnInfo[]
   values: FieldValue[][]
-  _clusters?: EsqlClusterInfo
+  _clusters?: EsqlEsqlClusterInfo
   profile?: any
 }
 
@@ -13471,6 +13470,32 @@ export interface InferenceContentObject {
   type: string
 }
 
+export interface InferenceCustomRequestParams {
+  content: string
+}
+
+export interface InferenceCustomResponseParams {
+  json_parser: any
+}
+
+export interface InferenceCustomServiceSettings {
+  headers?: any
+  input_type?: any
+  query_parameters?: any
+  request: InferenceCustomRequestParams
+  response: InferenceCustomResponseParams
+  secret_parameters: any
+  url?: string
+}
+
+export type InferenceCustomServiceType = 'custom'
+
+export interface InferenceCustomTaskSettings {
+  parameters?: any
+}
+
+export type InferenceCustomTaskType = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion'
+
 export interface InferenceDeepSeekServiceSettings {
   api_key: string
   model_id: string
@@ -13605,6 +13630,11 @@ export interface InferenceInferenceEndpointInfoCohere extends InferenceInference
   task_type: InferenceTaskTypeCohere
 }
 
+export interface InferenceInferenceEndpointInfoCustom extends InferenceInferenceEndpoint {
+  inference_id: string
+  task_type: InferenceTaskTypeCustom
+}
+
 export interface InferenceInferenceEndpointInfoDeepSeek extends InferenceInferenceEndpoint {
   inference_id: string
   task_type: InferenceTaskTypeDeepSeek
@@ -13780,6 +13810,8 @@ export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion'
 
 export type InferenceTaskTypeCohere = 'text_embedding' | 'rerank' | 'completion'
 
+export type InferenceTaskTypeCustom = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion'
+
 export type InferenceTaskTypeDeepSeek = 'completion' | 'chat_completion'
 
 export type InferenceTaskTypeELSER = 'sparse_embedding'
@@ -13986,6 +14018,17 @@ export interface InferencePutCohereRequest extends RequestBase {
 
 export type InferencePutCohereResponse = InferenceInferenceEndpointInfoCohere
 
+export interface InferencePutCustomRequest extends RequestBase {
+  task_type: InferenceCustomTaskType
+  custom_inference_id: Id
+  chunking_settings?: InferenceInferenceChunkingSettings
+  service: InferenceCustomServiceType
+  service_settings: InferenceCustomServiceSettings
+  task_settings?: InferenceCustomTaskSettings
+}
+
+export type InferencePutCustomResponse = InferenceInferenceEndpointInfoCustom
+
 export interface InferencePutDeepseekRequest extends RequestBase {
   task_type: InferenceTaskTypeDeepSeek
   deepseek_inference_id: Id