elastic
diff --git a/‎output/openapi/elasticsearch-openapi.json‎
Lines changed: 330 additions & 3 deletions b/‎output/openapi/elasticsearch-openapi.json‎
Lines changed: 330 additions & 3 deletions
diff --git a/‎output/openapi/elasticsearch-serverless-openapi.json‎
Lines changed: 330 additions & 3 deletions b/‎output/openapi/elasticsearch-serverless-openapi.json‎
Lines changed: 330 additions & 3 deletions
diff --git a/‎output/schema/schema.json‎
Lines changed: 566 additions & 54 deletions b/‎output/schema/schema.json‎
Lines changed: 566 additions & 54 deletions
diff --git a/‎output/typescript/types.ts‎
Lines changed: 43 additions & 0 deletions b/‎output/typescript/types.ts‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎specification/_doc_ids/table.csv‎
Lines changed: 1 addition & 0 deletions b/‎specification/_doc_ids/table.csv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎specification/_json_spec/inference.put_nvidia.json‎
Lines changed: 49 additions & 0 deletions b/‎specification/_json_spec/inference.put_nvidia.json‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎specification/inference/_types/CommonTypes.ts‎
Lines changed: 85 additions & 0 deletions b/‎specification/inference/_types/CommonTypes.ts‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎specification/inference/_types/Services.ts‎
Lines changed: 12 additions & 0 deletions b/‎specification/inference/_types/Services.ts‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎specification/inference/_types/TaskType.ts‎
Lines changed: 7 additions & 0 deletions b/‎specification/inference/_types/TaskType.ts‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎specification/inference/put/PutRequest.ts‎
Lines changed: 1 addition & 0 deletions b/‎specification/inference/put/PutRequest.ts‎
Lines changed: 1 addition & 0 deletions
@@ -398,6 +398,7 @@ inference-api-put-huggingface,https://www.elastic.co/docs/api/doc/elasticsearch/
 inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai,,
 inference-api-put-llama,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-llama,,
 inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html,
+inference-api-put-nvidia,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-nvidia,,
 inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html,
 inference-api-put-openshift-ai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai,,
 inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai,,
 
@@ -0,0 +1,49 @@
+{
+  "inference.put_nvidia": {
+    "documentation": {
+      "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-nvidia",
+      "description": "Create an Nvidia inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{nvidia_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "enum",
+              "description": "The task type",
+              "options": [
+                "chat_completion",
+                "completion",
+                "rerank",
+                "text_embedding"
+              ]
+            },
+            "nvidia_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings",
+      "required": true
+    },
+    "params": {
+      "timeout": {
+        "type": "time",
+        "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+        "default": "30s"
+      }
+    }
+  }
+}
@@ -1810,6 +1810,90 @@ export enum MistralServiceType {
   mistral
 }
 
+export class NvidiaServiceSettings {
+  /**
+   * A valid API key for your Nvidia endpoint.
+   * Can be found in `API Keys` section of Nvidia account settings.
+   */
+  api_key: string
+  /**
+   * The URL of the Nvidia model endpoint. If not provided, the default endpoint URL is used depending on the task type:
+   *
+   * * For `text_embedding` task - `https://integrate.api.nvidia.com/v1/embeddings`.
+   * * For `completion` and `chat_completion` tasks - `https://integrate.api.nvidia.com/v1/chat/completions`.
+   * * For `rerank` task - `https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking`.
+   */
+  url?: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the model's documentation for the name if needed.
+   * Service has been tested and confirmed to be working with the following models:
+   *
+   * * For `text_embedding` task - `nvidia/llama-3.2-nv-embedqa-1b-v2`.
+   * * For `completion` and `chat_completion` tasks - `microsoft/phi-3-mini-128k-instruct`.
+   * * For `rerank` task - `nv-rerank-qa-mistral-4b:1`.
+   * Service doesn't support `text_embedding` task `baai/bge-m3` and `nvidia/nvclip` models due to them not recognizing the `input_type` parameter.
+   */
+  model_id: string
+  /**
+   * For a `text_embedding` task, the maximum number of tokens per input. Inputs exceeding this value are truncated prior to sending to the Nvidia API.
+   */
+  max_input_tokens?: integer
+  /**
+   * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
+   */
+  similarity?: NvidiaSimilarityType
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the Nvidia API.
+   * By default, the `nvidia` service sets the number of requests allowed per minute to 3000.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum NvidiaTaskType {
+  chat_completion,
+  completion,
+  rerank,
+  text_embedding
+}
+
+export enum NvidiaServiceType {
+  nvidia
+}
+
+export enum NvidiaSimilarityType {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export class NvidiaTaskSettings {
+  /**
+   * For a `text_embedding` task, type of input sent to the Nvidia endpoint.
+   * Valid values are:
+   *
+   * * `ingest`: Mapped to Nvidia's `passage` value in request. Used when generating embeddings during indexing.
+   * * `search`: Mapped to Nvidia's `query` value in request. Used when generating embeddings during querying.
+   *
+   * IMPORTANT: For Nvidia endpoints, if the `input_type` field is not specified, it defaults to `query`.
+   */
+  input_type?: NvidiaInputType
+  /**
+   * For a `text_embedding` task, the method used by the Nvidia model to handle inputs longer than the maximum token length.
+   * Valid values are:
+   *
+   * * `END`: When the input exceeds the maximum input token length, the end of the input is discarded.
+   * * `NONE`: When the input exceeds the maximum input token length, an error is returned.
+   * * `START`: When the input exceeds the maximum input token length, the start of the input is discarded.
+   */
+  truncate?: CohereTruncateType
+}
+
+export enum NvidiaInputType {
+  ingest,
+  search
+}
+
 export class OpenAIServiceSettings {
   /**
    * A valid API key of your OpenAI account.
@@ -1908,6 +1992,7 @@ export class OpenShiftAiServiceSettings {
   max_input_tokens?: integer
   /**
    * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
+   * If not specified, the default dot_product value is used.
    */
   similarity?: OpenShiftAiSimilarityType
   /**
 
@@ -41,6 +41,7 @@ import {
   TaskTypeJinaAi,
   TaskTypeLlama,
   TaskTypeMistral,
+  TaskTypeNvidia,
   TaskTypeOpenAI,
   TaskTypeOpenShiftAi,
   TaskTypeVoyageAI,
@@ -304,6 +305,17 @@ export class InferenceEndpointInfoMistral extends InferenceEndpoint {
   task_type: TaskTypeMistral
 }
 
+export class InferenceEndpointInfoNvidia extends InferenceEndpoint {
+  /**
+   * The inference ID
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeNvidia
+}
+
 export class InferenceEndpointInfoOpenAI extends InferenceEndpoint {
   /**
    * The inference Id
 
@@ -141,6 +141,13 @@ export enum TaskTypeMistral {
   completion
 }
 
+export enum TaskTypeNvidia {
+  chat_completion,
+  completion,
+  rerank,
+  text_embedding
+}
+
 export enum TaskTypeOpenAI {
   text_embedding,
   chat_completion,
 
@@ -49,6 +49,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * * JinaAI (`rerank`, `text_embedding`)
  * * Llama (`chat_completion`, `completion`, `text_embedding`)
  * * Mistral (`chat_completion`, `completion`, `text_embedding`)
+ * * Nvidia (`chat_completion`, `completion`, `text_embedding`, `rerank`)
  * * OpenAI (`chat_completion`, `completion`, `text_embedding`)
  * * OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
  * * VoyageAI (`rerank`, `text_embedding`)