From 72b1a5808477438e02ba6d04739521d82f04552b Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 16 Jan 2026 09:13:54 +0100 Subject: [PATCH 1/2] fix: update inference processor from 'inf2' to 'neuronx' The current version generates the following image tag for 0.4.1: 0.10.2-inf2-py310-sdk2.26.0-ubuntu22.04 But it should be: 0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04 --- .../core/image_uri_config/huggingface-vllm-neuronx.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json b/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json index c2592c915a..9996908255 100644 --- a/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json +++ b/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json @@ -1,7 +1,7 @@ { "inference": { "processors": [ - "inf2" + "neuronx" ], "version_aliases": { "0.4": "0.4.1" @@ -27,7 +27,7 @@ "tag_prefix": "0.10.2", "repository": "huggingface-vllm-inference-neuronx", "container_version": { - "inf2": "ubuntu22.04" + "neuronx": "ubuntu22.04" }, "sdk_versions": [ "sdk2.26.0" @@ -35,4 +35,4 @@ } } } -} \ No newline at end of file +} From ff1c4e1c7d051caf777dc701ee1286c2f52305d5 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 16 Jan 2026 09:50:02 +0100 Subject: [PATCH 2/2] chore: add HF vllm neuron 0.4.2 image --- .../huggingface-vllm-neuronx.json | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json b/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json index 9996908255..2a2e6c8f78 100644 --- a/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json +++ b/sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json @@ -4,7 +4,7 @@ "neuronx" ], "version_aliases": { - "0.4": "0.4.1" + "0.4": "0.4.2" }, "versions": { "0.4.1": { @@ -32,6 +32,32 @@ "sdk_versions": [ "sdk2.26.0" ] + }, + "0.4.2": { + "py_versions": [ + "py310" + ], + "registries": { + "ap-northeast-1": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "eu-central-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-3": "763104351884", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-west-2": "763104351884" + }, + "tag_prefix": "0.11.0-optimum0.4.2", + "repository": "huggingface-vllm-inference-neuronx", + "container_version": { + "neuronx": "ubuntu22.04" + }, + "sdk_versions": [ + "sdk2.26.0" + ] } } }