triton-inference-server · mc-nv · Jun 24, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 24, 2026
diff --git a/Conceptual_Guide/Part_4-inference_acceleration/README.md b/Conceptual_Guide/Part_4-inference_acceleration/README.md
@@ -135,10 +135,10 @@ Before proceeding, please set up a model repository for the Text Recognition mod
 
 ```
 # Server Container
-docker run --gpus=all -it --shm-size=256m --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd):/workspace/ -v/$(pwd)/model_repository:/models nvcr.io/nvidia/tritonserver:22.11-py3 bash
+docker run --gpus=all -it --shm-size=256m --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd):/workspace/ -v/$(pwd)/model_repository:/models nvcr.io/nvidia/tritonserver:26.06-py3 bash
 
 # Client Container (on a different terminal)
-docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:22.11-py3-sdk bash
+docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:26.06-py3-sdk bash
 ```
 
 Since this is a model we converted to ONNX, and TensorRT acceleration examples are linked throughout the explanation, we will explore the ONNX pathway. There are three cases to consider with ONNX backend:

diff --git a/Conceptual_Guide/Part_5-Model_Ensembles/README.md b/Conceptual_Guide/Part_5-Model_Ensembles/README.md
@@ -315,7 +315,7 @@ We'll again be launching Triton using docker containers. This time, we'll start
 docker run --gpus=all -it --shm-size=1G --rm  \
   -p8000:8000 -p8001:8001 -p8002:8002 \
   -v ${PWD}:/workspace/ -v ${PWD}/model_repository:/models \
-  nvcr.io/nvidia/tritonserver:22.12-py3
+  nvcr.io/nvidia/tritonserver:26.06-py3
 ```
 
 We'll need to install a couple of dependencies for our Python backend scripts.

diff --git a/.../EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/containers/triton_trt_llm.containerfile b/.../EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/containers/triton_trt_llm.containerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
+ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:26.06-trtllm-python-py3
 FROM ${BASE_CONTAINER_IMAGE}
 
 ENV EFA_INSTALLER_VERSION=1.33.0

diff --git a/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/gen_ai_perf.yaml b/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/gen_ai_perf.yaml
@@ -7,7 +7,7 @@ metadata:
 spec:
   containers:
   - name: triton
-    image: nvcr.io/nvidia/tritonserver:24.07-py3-sdk
+    image: nvcr.io/nvidia/tritonserver:26.06-py3-sdk
     command: ["sleep", "infinity"]
     volumeMounts:
       - mountPath: /var/run/models

diff --git a/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/setup_ssh_efs.yaml b/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/multinode_helm_chart/setup_ssh_efs.yaml
@@ -7,7 +7,7 @@ metadata:
 spec:
   containers:
   - name: triton
-    image: nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
+    image: nvcr.io/nvidia/tritonserver:26.06-trtllm-python-py3
     command: ["sleep", "infinity"]
     resources:
       limits:

diff --git a/...netes/TensorRT-LLM_Autoscaling_and_Load_Balancing/containers/triton_trt-llm.containerfile b/...netes/TensorRT-LLM_Autoscaling_and_Load_Balancing/containers/triton_trt-llm.containerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
+ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:26.06-trtllm-python-py3
 ARG ENGINE_DEST_PATH=/var/run/engines
 ARG HF_HOME=/var/run/cache
 

diff --git a/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/setup_ssh-nfs.yaml b/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/setup_ssh-nfs.yaml
@@ -7,7 +7,7 @@ metadata:
 spec:
   containers:
   - name: triton
-    image: nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
+    image: nvcr.io/nvidia/tritonserver:26.06-trtllm-python-py3
     command: ["sleep", "infinity"]
     resources:
       limits:

diff --git a/...rnetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile b/...rnetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.05-trtllm-python-py3
+ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:26.06-trtllm-python-py3
 ARG ENGINE_DEST_PATH=/var/run/models/engine
 ARG HF_HOME=/var/run/hugging_face
 ARG MODEL_DEST_PATH=/var/run/models/model

diff --git a/Feature_Guide/Speculative_Decoding/vLLM/README.md b/Feature_Guide/Speculative_Decoding/vLLM/README.md
@@ -198,7 +198,7 @@ To run Draft Model-Based Speculative Decoding with Triton Inference Server, it i
 docker run --gpus all -it --net=host --rm -p 8001:8001 --shm-size=1G \
     --ulimit memlock=-1 --ulimit stack=67108864 \
     -v </path/to/model_repository>:/model_repository \
-    nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3 \
+    nvcr.io/nvidia/tritonserver:26.06-vllm-python-py3 \
     tritonserver --model-repository /model_repository \
     --model-control-mode explicit --load-model opt_model
 ```
diff --git a/HuggingFace/README.md b/HuggingFace/README.md
@@ -115,7 +115,7 @@ Before the specifics around deploying the models can be discussed, the first ste
 
 ```
 # Pull the PyTorch Container from NGC
-docker run -it --gpus=all -v ${PWD}:/workspace nvcr.io/nvidia/pytorch:23.05-py3
+docker run -it --gpus=all -v ${PWD}:/workspace nvcr.io/nvidia/pytorch:26.05-py3
 
 # Install dependencies
 pip install transformers

diff --git a/Popular_Models_Guide/DeepSeek/README.md b/Popular_Models_Guide/DeepSeek/README.md
@@ -67,7 +67,7 @@ Then you can run the tritonserver as usual
 LOCAL_MODEL_REPOSITORY=./vllm_backend/samples/model_repository/
 docker run --rm -it --net host --shm-size=2g  --ulimit memlock=-1 \
 --ulimit stack=67108864 --gpus all -v $LOCAL_MODEL_REPOSITORY:/opt/tritonserver/model_repository  \
-nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3 tritonserver --model-repository=model_repository/
+nvcr.io/nvidia/tritonserver:26.06-vllm-python-py3 tritonserver --model-repository=model_repository/
 ```
 The server has launched successfully when you see the following outputs in your console:
 
@@ -100,7 +100,7 @@ that has an example client.py to test the model.
 
 ```bash
 LOCAL_WORKSPACE=./vllm_backend/samples
-docker run -ti --gpus all --network=host --pid=host --ipc=host -v $LOCAL_WORKSPACE:/workspace nvcr.io/nvidia/tritonserver:25.01-py3-sdk
+docker run -ti --gpus all --network=host --pid=host --ipc=host -v $LOCAL_WORKSPACE:/workspace nvcr.io/nvidia/tritonserver:26.06-py3-sdk
 ```
 Then you can use client as follows:
 ```bash

diff --git a/Popular_Models_Guide/Llama2/vllm_guide.md b/Popular_Models_Guide/Llama2/vllm_guide.md
@@ -42,7 +42,7 @@ The triton vLLM container can be pulled from [NGC](https://catalog.ngc.nvidia.co
 docker run --rm -it --net host --shm-size=2g \
     --ulimit memlock=-1 --ulimit stack=67108864 --gpus all \
     -v $PWD/llama2vllm:/opt/tritonserver/model_repository/llama2vllm \
-    nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3
+    nvcr.io/nvidia/tritonserver:26.06-vllm-python-py3
 ```
 This will create a `/opt/tritonserver/model_repository` folder that contains the `llama2vllm` model. The model itself will be pulled from the HuggingFace
 

diff --git a/Popular_Models_Guide/StableDiffusion/README.md b/Popular_Models_Guide/StableDiffusion/README.md
@@ -57,10 +57,10 @@ support matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/i
 ## Building the Triton Inference Server Image
 
 The example is designed based on the
-`nvcr.io/nvidia/tritonserver:24.08-py3` docker image and [TensorRT OSS v10.4](https://github.com/NVIDIA/TensorRT/releases/tag/v10.4.0).
+`nvcr.io/nvidia/tritonserver:26.06-py3` docker image and [TensorRT OSS v10.4](https://github.com/NVIDIA/TensorRT/releases/tag/v10.4.0).
 
 A set of convenience scripts are provided to create a docker image
-based on the `nvcr.io/nvidia/tritonserver:24.01-py3` image with the
+based on the `nvcr.io/nvidia/tritonserver:26.06-py3` image with the
 dependencies for the TensorRT Stable Diffusion demo installed.
 
 ### Triton Inference Server + TensorRT OSS

diff --git a/Quick_Deploy/HuggingFaceTransformers/Dockerfile b/Quick_Deploy/HuggingFaceTransformers/Dockerfile
@@ -23,5 +23,5 @@
 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM nvcr.io/nvidia/tritonserver:23.10-py3
-RUN pip install transformers==4.34.0 protobuf==3.20.3 sentencepiece==0.1.99 accelerate==0.23.0 einops==0.6.1
+FROM nvcr.io/nvidia/tritonserver:26.06-py3
+RUN pip install transformers==5.12.1 protobuf==3.20.3 sentencepiece==0.2.1 accelerate==0.23.0 einops==0.6.1
diff --git a/Quick_Deploy/HuggingFaceTransformers/README.md b/Quick_Deploy/HuggingFaceTransformers/README.md
@@ -220,7 +220,7 @@ the Triton server using the `docker run` command from above.
 Once Triton launches successfully, start a Triton SDK container by running the following in a separate window:
 
 ```bash
-docker run -it --net=host nvcr.io/nvidia/tritonserver:23.10-py3-sdk bash
+docker run -it --net=host nvcr.io/nvidia/tritonserver:26.06-py3-sdk bash
 ```
 This container comes with all of Triton's deployment analyzers pre-installed, meaning
 we can simply enter the following to get feedback on our model's inference performance:

diff --git a/Quick_Deploy/OpenVINO/README.md b/Quick_Deploy/OpenVINO/README.md
@@ -62,7 +62,7 @@ Note: This directory structure is how the Triton Inference Server can read the c
 
 ### 4. Run the Triton Inference Server
 ```
-docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.04-py3 tritonserver --model-repository=/models
+docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.06-py3 tritonserver --model-repository=/models
 ```
 
 ### 5. Download the Triton Client code `client.py` from GitHub to a place you want to run the Triton Client from.
@@ -73,7 +73,7 @@ wget https://raw.githubusercontent.com/triton-inference-server/tutorials/main/Qu
 ### 6. Run the Triton Client in the same location as the `client.py` file, install dependencies, and query the server
 Building a client requires three basic points. First, we setup a connection with the Triton Inference Server. Second, we specify the names of the input and output layer(s) of our model. And last, we send an inference request to the Triton Inference Server.
 ```
-docker run -it --rm --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:24.04-py3-sdk bash
+docker run -it --rm --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:26.06-py3-sdk bash
 ```
 ```
 pip install torchvision
@@ -152,7 +152,7 @@ Note: This directory structure is how the Triton Inference Server can read the c
 
 ### 4. Run the Triton Inference Server
 ```
-docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.04-py3 tritonserver --model-repository=/models
+docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.06-py3 tritonserver --model-repository=/models
 ```
 
 ### 5. In another terminal, download the Triton Client code `client.py` from GitHub to the place you want to run the Triton Client from.
@@ -170,7 +170,7 @@ In the `client.py` file, you’ll need to update the model input and output name
 ### 6. Run the Triton Client in the same location as the `client.py` file, install dependencies, and query the server.
 Building a client requires three basic points. First, we setup a connection with the Triton Inference Server. Second, we specify the names of the input and output layer(s) of our model. And last, we send an inference request to the Triton Inference Server.
 ```
-docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:24.04-py3-sdk bash
+docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:26.06-py3-sdk bash
 ```
 ```
 pip install torchvision
@@ -189,7 +189,7 @@ The output format here is `<confidence_score>:<classification_index>`. To learn
 ### 1. Download and prepare the TensorFlow model.
 Export the TensorFlow model in SavedModel format:
 ```
-docker run -it --gpus all -v ${PWD}:/workspace nvcr.io/nvidia/tensorflow:24.04-tf2-py3
+docker run -it --gpus all -v ${PWD}:/workspace nvcr.io/nvidia/tensorflow:26.05-tf2-py3
 ```
 ```
 python3 export.py
@@ -252,7 +252,7 @@ Note: This directory structure is how the Triton Inference Server can read the c
 
 ### 4. Run the Triton Inference Server
 ```
-docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.04-py3 tritonserver --model-repository=/models
+docker run --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.06-py3 tritonserver --model-repository=/models
 ```
 
 ### 5. In another terminal, download the Triton Client code `client.py` from GitHub to the place you want to run the Triton Client from.
@@ -263,7 +263,7 @@ wget https://raw.githubusercontent.com/triton-inference-server/tutorials/main/Qu
 ### 6. Run the Triton Client in the same location as the `client.py` file, install dependencies, and query the server.
 Building a client requires three basic points. First, we setup a connection with the Triton Inference Server. Second, we specify the names of the input and output layer(s) of our model. And last, we send an inference request to the Triton Inference Server.
 ```
-docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:24.04-py3-sdk bash
+docker run -it --net=host -v ${PWD}:/workspace/ nvcr.io/nvidia/tritonserver:26.06-py3-sdk bash
 ```
 ```
 pip install --upgrade tensorflow

diff --git a/Quick_Deploy/PyTorch/export.py b/Quick_Deploy/PyTorch/export.py
@@ -27,12 +27,6 @@
 import torch
 from torchvision import models
 
-model = (
-    models.resnet50(
-        weights=models.ResNet50_Weights.IMAGENET1K_V1
-    )
-    .eval()
-    .to("cuda")
-)
+model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1).eval().to("cuda")
 traced_model = torch.jit.trace(model, torch.randn(1, 3, 224, 224).to("cuda"))
 torch.jit.save(traced_model, "model.pt")
diff --git a/Triton_Inference_Server_Python_API/README.md b/Triton_Inference_Server_Python_API/README.md
@@ -54,30 +54,30 @@ https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
 ## Installation
 
 The tutorial and Python API package are designed to be installed and
-run within the `nvcr.io/nvidia/tritonserver:24.08-py3` docker image.
+run within the `nvcr.io/nvidia/tritonserver:26.06-py3` docker image.
 
 A set of convenience scripts are provided to create a docker image
-based on the `nvcr.io/nvidia/tritonserver:24.08-py3` image with the
+based on the `nvcr.io/nvidia/tritonserver:26.06-py3` image with the
 Python API installed plus additional dependencies required for the
 examples.
 
-### Triton Inference Server 24.08 + Python API
+### Triton Inference Server 26.06 + Python API
 
 #### Clone Repository
 ```bash
 git clone https://github.com/triton-inference-server/tutorials.git
 cd tutorials/Triton_Inference_Server_Python_API
 ```
 
-#### Build `triton-python-api:r24.08` Image
+#### Build `triton-python-api:r26.06` Image
 ```bash
 ./build.sh
 ```
 
 #### Supported Backends
 
 The built image includes all the backends shipped by default in the
-tritonserver `nvcr.io/nvidia/tritonserver:24.08-py3` container.
+tritonserver `nvcr.io/nvidia/tritonserver:26.06-py3` container.
 
 ```
 dali  fil  identity  onnxruntime  openvino  python  pytorch  repeat  square  tensorflow  tensorrt
@@ -95,7 +95,7 @@ different data types. The `identity` model copies provided inputs of
 
 ## Hello World
 
-### Start `triton-python-api:r24.08` Container
+### Start `triton-python-api:r26.06` Container
 
 The following command starts a container and volume mounts the current
 directory as `workspace`.
@@ -163,7 +163,7 @@ This example is based on the
 tutorial.
 
 
-#### Build `triton-python-api:r24.08-diffusion` Image and Stable Diffusion Models
+#### Build `triton-python-api:r26.06-diffusion` Image and Stable Diffusion Models
 
 Please note the following command will take many minutes depending on
 your hardware configuration and network connection.
@@ -175,7 +175,7 @@ your hardware configuration and network connection.
 #### Supported Backends
 
 The built image includes all the backends shipped by default in the
-tritonserver `nvcr.io/nvidia/tritonserver:24.08-py3` container.
+tritonserver `nvcr.io/nvidia/tritonserver:26.06-py3` container.
 
 ```
 dali  fil  identity  onnxruntime  openvino  python  pytorch  repeat  square  tensorflow  tensorrt

diff --git a/Triton_Inference_Server_Python_API/build.sh b/Triton_Inference_Server_Python_API/build.sh
@@ -39,8 +39,8 @@ DOCKERFILE=${SOURCE_DIR}/docker/Dockerfile
 
 # Base Images
 BASE_IMAGE=nvcr.io/nvidia/tritonserver
-BASE_IMAGE_TAG_IDENTITY=24.08-py3
-BASE_IMAGE_TAG_DIFFUSION=24.08-py3
+BASE_IMAGE_TAG_IDENTITY=26.06-py3
+BASE_IMAGE_TAG_DIFFUSION=26.06-py3
 
 get_options() {
     while :; do
@@ -137,7 +137,7 @@ get_options() {
     fi
 
     if [ -z "$TAG" ]; then
-        TAG="triton-python-api:r24.08"
+        TAG="triton-python-api:r26.06"
 
 	if [[ $FRAMEWORK == "DIFFUSION" ]]; then
 	    TAG+="-diffusion"
@@ -181,7 +181,7 @@ get_options "$@"
 
 if [[ $FRAMEWORK == DIFFUSION ]]; then
     BASE_IMAGE="tritonserver"
-    BASE_IMAGE_TAG="r24.08-diffusion"
+    BASE_IMAGE_TAG="r26.06-diffusion"
 fi
 
 # BUILD RUN TIME IMAGE
@@ -203,8 +203,8 @@ if [[ $FRAMEWORK == DIFFUSION ]]; then
 	set -x
     fi
     $RUN_PREFIX mkdir -p ${SOURCE_DIR}/backend/diffusion
-    $RUN_PREFIX $SOURCE_DIR/../Popular_Models_Guide/StableDiffusion/build.sh --framework diffusion --tag tritonserver:r24.08-diffusion
-    $RUN_PREFIX docker run --rm -it -v ${SOURCE_DIR}:/workspace tritonserver:r24.08-diffusion /bin/bash -c "cp -rf /tmp/TensorRT/demo/Diffusion /workspace/backend/diffusion"
+    $RUN_PREFIX $SOURCE_DIR/../Popular_Models_Guide/StableDiffusion/build.sh --framework diffusion --tag tritonserver:r26.06-diffusion
+    $RUN_PREFIX docker run --rm -it -v ${SOURCE_DIR}:/workspace tritonserver:r26.06-diffusion /bin/bash -c "cp -rf /tmp/TensorRT/demo/Diffusion /workspace/backend/diffusion"
     $RUN_PREFIX cp $SOURCE_DIR/../Popular_Models_Guide/StableDiffusion/backend/diffusion/model.py ${SOURCE_DIR}/backend/diffusion/model.py
     $RUN_PREFIX mkdir -p ${SOURCE_DIR}/diffusion-models/stable_diffusion_1_5/1
     $RUN_PREFIX cp $SOURCE_DIR/../Popular_Models_Guide/StableDiffusion/diffusion-models/stable_diffusion_1_5/config.pbtxt  ${SOURCE_DIR}/diffusion-models/stable_diffusion_1_5/config.pbtxt

diff --git a/Triton_Inference_Server_Python_API/deps/requirements.txt b/Triton_Inference_Server_Python_API/deps/requirements.txt
@@ -26,4 +26,4 @@
 
 pyright
 pytest
-ray[all]==2.36.0
+ray[all]==2.55.1
diff --git a/Triton_Inference_Server_Python_API/docker/Dockerfile b/Triton_Inference_Server_Python_API/docker/Dockerfile
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver
-ARG BASE_IMAGE_TAG=24.08-py3
+ARG BASE_IMAGE_TAG=26.06-py3
 
 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} as triton-python-api
 

diff --git a/Triton_Inference_Server_Python_API/examples/kafka-io/README.md b/Triton_Inference_Server_Python_API/examples/kafka-io/README.md
@@ -50,7 +50,7 @@ In this Kafka I/O pipeline we deploy a pre-processing stage of tokenization base
 Once you have the docker service up and running, launch a container by executing the following command:
 
 ```bash
-docker run --rm -it --gpus all -v <path>/<to>/tutorials/Triton_Inference_Server_Python_API/examples/kafka-io/:/opt/tritonserver/kafka-io -w /opt/tritonserver/kafka-io  --entrypoint bash nvcr.io/nvidia/tritonserver:24.06-py3
+docker run --rm -it --gpus all -v <path>/<to>/tutorials/Triton_Inference_Server_Python_API/examples/kafka-io/:/opt/tritonserver/kafka-io -w /opt/tritonserver/kafka-io  --entrypoint bash nvcr.io/nvidia/tritonserver:26.06-py3
 ```
 
 ### Clone Repository

diff --git a/Triton_Inference_Server_Python_API/examples/kafka-io/requirements.txt b/Triton_Inference_Server_Python_API/examples/kafka-io/requirements.txt
@@ -3,8 +3,8 @@ confluent_kafka==2.5.0
 gcn-kafka==0.3.3
 jsonschema==4.23.0
 pandas==2.2.2
-ray==2.32.0
-ray[serve]==2.32.0
-torch==2.3.1
-transformers==4.42.4
-tritonclient==2.47.0
+ray==2.55.1
+ray[serve]==2.55.1
+torch==2.12.1
+transformers==5.12.1
+tritonclient==2.69.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,4 +26,4 @@ @@
     pyright
     pytest
-    ray[all]==2.36.0
+    ray[all]==2.55.1