GoogleCloudPlatform · shamika · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 3, 2026
@@ -69,4 +69,6 @@
 /prediction/vertexai_serving_vllm/vertexai_serving_vllm_gpu_llama3_2_3B.ipynb @ravi-dalal
 /prediction/vertexai_serving_vllm/vertexai_serving_vllm_tpu_llama3_2_3B.ipynb @ravi-dalal
 /prediction/vertexai_serving_vllm/vertexai_serving_vllm_tpu_gcs_llama3_2_3B.ipynb @ravi-dalal
-/generative_ai/csm_intro.ipynb @elaidlaw
+/generative_ai/csm_intro.ipynb @elaidlaw
+/prediction/vertexai_serving_vllm/vertexai_serving_vllm_mistral_7b_with_lora_adopters_custom_container.ipynb @shamikaa
+/prediction/vertexai_serving_vllm/vertexai_serving_vllm_mistral_7b_with_lora_adopters_prebuilt_container.ipynb @shamikaa
@@ -0,0 +1,33 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+# Install gcloud SDK
+RUN apt-get update && \
+    apt-get install -y apt-utils git apt-transport-https gnupg ca-certificates curl \
+    && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
+    && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \
+    && apt-get update -y && apt-get install google-cloud-cli -y \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace/vllm
+
+# Copy entrypoint.sh to the container
+COPY ./entrypoint.sh /workspace/vllm/vertexai/entrypoint.sh
+RUN chmod +x /workspace/vllm/vertexai/entrypoint.sh
+
+ENTRYPOINT ["/workspace/vllm/vertexai/entrypoint.sh"]
diff --git a/notebooks/official/prediction/vertexai_serving_vllm/custom_container/ReadMe.md b/notebooks/official/prediction/vertexai_serving_vllm/custom_container/ReadMe.md
@@ -0,0 +1,63 @@
+Architecture Overview
+
+  The custom container consists of three main components:
+
+  1. Dockerfile (Dockerfile)
+
+  Builds a custom container image that:
+  - Takes a base image (vLLM) via build argument
+  - Installs Google Cloud SDK to enable GCS (Google Cloud Storage) access
+  - Copies and configures the entrypoint script
+  - Sets the entrypoint to /workspace/vllm/vertexai/entrypoint.sh
+
+  2. Entrypoint Script (entrypoint.sh:1-54)
+
+  This is the key component that provides smart model loading:
+
+  What it does:
+  - Intercepts all command-line arguments passed to the container
+  - Detects if the --model= argument points to a GCS path (starts with gs://)
+  - If it's a GCS path:
+    - Downloads the model from GCS to /tmp/model_dir using gcloud storage cp
+    - Rewrites the --model= argument to point to the local directory
+  - If it's a local path, passes it through unchanged
+  - Executes the original command with updated arguments
+
+  Key logic (lines 37-51):
+  for a in "$@"; do
+      if [[ $a == $model_arg* ]]; then  # Detects --model=
+          model_path=${a#*=}
+          if [[ $model_path == $gcs_protocol* ]]; then  # Is it gs://?
+              download_model_from_gcs $model_path
+              updated_args+=("--model=${LOCAL_MODEL_DIR}")  # Use local path
+          else
+              updated_args+=("--model=${model_path}")
+          fi
+      else
+          updated_args+=("$a")
+      fi
+  done
+
+  3. Cloud Build Configuration (cloudbuild.yaml:15-37)
+
+  Automates the container build and push process:
+
+  Build steps:
+  1. If _DEVICE_TYPE=cpu: Clones vLLM repo and builds the CPU base image
+  2. Builds the custom container with the entrypoint on top of the base image
+  3. Pushes to Artifact Registry at $LOCATION-docker.pkg.dev/$PROJECT_ID/${_REPOSITORY}/vllm-${_DEVICE_TYPE}
+
+  Configurable substitutions:
+  - _DEVICE_TYPE: gpu (default) or cpu
+  - _BASE_IMAGE: vllm/vllm-openai (default)
+  - _REPOSITORY: my-docker-repo (default)
+
+  How It Works End-to-End
+
+  1. Build: Cloud Build creates a container with gcloud SDK + entrypoint script
+  2. Deploy: Container deployed to Vertex AI
+  3. Runtime: When Vertex AI starts the container with arguments like --model=gs://my-bucket/my-model
+  4. Entrypoint intercepts: Downloads the model from GCS to local disk
+  5. vLLM starts: With the model loaded from local path instead of GCS
+
+  This design allows you to store models in GCS and have them automatically downloaded at container startup, rather than baking them into the container image.
@@ -0,0 +1,37 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+steps:
+- name: 'gcr.io/cloud-builders/docker'
+  automapSubstitutions: true
+  script: |
+      #!/usr/bin/env bash
+      set -euo pipefail
+      device_type_param=${_DEVICE_TYPE}
+      device_type=${device_type_param,,}
+      base_image=${_BASE_IMAGE}
+      image_name="vllm-${_DEVICE_TYPE}"
+      if [[ $device_type == "cpu" ]]; then
+        echo "Quietly building open source vLLM CPU container image"
+        git clone https://github.com/vllm-project/vllm.git
+        cd vllm && DOCKER_BUILDKIT=1 docker build -t $base_image -f docker/Dockerfile.cpu . -q
+        cd ..
+      fi
+      echo "Quietly building container image for: $device_type"
+      docker build -t $LOCATION-docker.pkg.dev/$PROJECT_ID/${_REPOSITORY}/$image_name --build-arg BASE_IMAGE=$base_image . -q
+      docker push $LOCATION-docker.pkg.dev/$PROJECT_ID/${_REPOSITORY}/$image_name
+substitutions:
+    _DEVICE_TYPE: gpu
+    _BASE_IMAGE: vllm/vllm-openai
+    _REPOSITORY: my-docker-repo
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+readonly LOCAL_MODEL_DIR=${LOCAL_MODEL_DIR:-"/tmp/model_dir"}
+readonly LOCAL_LORA_DIR=${LOCAL_LORA_DIR:-"/tmp/lora_adapters"}
+
+download_model_from_gcs() {
+    gcs_uri=$1
+    mkdir -p $LOCAL_MODEL_DIR
+    echo "Downloading model from $gcs_uri to local directory..."
+    if gcloud storage cp -r "$gcs_uri/*" "$LOCAL_MODEL_DIR"; then
+      echo "Model downloaded successfully to ${LOCAL_MODEL_DIR}."
+    else
+      echo "Failed to download model from Cloud Storage: $gcs_uri." >&2
+      exit 1
+    fi
+}
+
+
+download_lora_adapters_from_gcs() {
+    gcs_uri=$1
+    local_dir=$2
+    mkdir -p "$local_dir"
+    echo "Downloading LoRA adapters from $gcs_uri to $local_dir..."
+    if gcloud storage cp -r "$gcs_uri/*" "$local_dir"; then
+      echo "LoRA adapters downloaded successfully to ${local_dir}."
+    else
+      echo "Failed to download LoRA adapters from Cloud Storage: $gcs_uri." >&2
+      exit 1
+    fi
+}
+
+updated_args=()
+model_arg="--model="
+lora_modules_arg="--lora-modules="
+gcs_protocol="gs://"
+lora_counter=0
+
+for a in "$@"; do
+    if [[ $a == $model_arg* ]]; then
+        model_path=${a#*=}
+        echo "Found model: $model_path"
+        if [[ $model_path == $gcs_protocol* ]]; then
+            download_model_from_gcs $model_path
+            updated_args+=("--model=${LOCAL_MODEL_DIR}")
+        else
+            updated_args+=("--model=${model_path}")
+        fi
+    elif [[ $a == $lora_modules_arg* ]]; then
+        lora_spec=${a#*=}
+        echo "Found LoRA module: $lora_spec"
+
+        # LoRA modules can be in format "name=path" or just "path"
+        if [[ $lora_spec == *"="* ]]; then
+            lora_name=${lora_spec%%=*}
+            lora_path=${lora_spec#*=}
+        else
+            lora_name="lora_$lora_counter"
+            lora_path=$lora_spec
+            ((lora_counter++))
+        fi
+
+        if [[ $lora_path == $gcs_protocol* ]]; then
+            local_lora_path="${LOCAL_LORA_DIR}/${lora_name}"
+            download_lora_adapters_from_gcs "$lora_path" "$local_lora_path"
+            updated_args+=("--lora-modules=${lora_name}=${local_lora_path}")
+        else
+            updated_args+=("$a")
+        fi
+    else
+        updated_args+=("$a")
+    fi
+done
+
+echo "Launch command: " "${updated_args[@]}"
+exec "${updated_args[@]}"
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+# Default configurations (same as cloudbuild.yaml substitutions)
+DEVICE_TYPE=${DEVICE_TYPE:-gpu}
+BASE_IMAGE=${BASE_IMAGE:-vllm/vllm-openai}
+REPOSITORY=${REPOSITORY:-my-docker-repo}
+PROJECT_ID=${PROJECT_ID:-}
+LOCATION=${LOCATION:-us-central1}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --device-type)
+            DEVICE_TYPE="$2"
+            shift 2
+            ;;
+        --base-image)
+            BASE_IMAGE="$2"
+            shift 2
+            ;;
+        --repository)
+            REPOSITORY="$2"
+            shift 2
+            ;;
+        --project-id)
+            PROJECT_ID="$2"
+            shift 2
+            ;;
+        --location)
+            LOCATION="$2"
+            shift 2
+            ;;
+        --help)
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  --device-type    Device type: gpu or cpu (default: gpu)"
+            echo "  --base-image     Base Docker image (default: vllm/vllm-openai)"
+            echo "  --repository     Artifact Registry repository (default: my-docker-repo)"
+            echo "  --project-id     GCP Project ID (required for push)"
+            echo "  --location       GCP location (default: us-central1)"
+            echo "  --help           Show this help message"
+            echo ""
+            echo "Examples:"
+            echo "  # Build GPU container locally"
+            echo "  $0 --device-type gpu"
+            echo ""
+            echo "  # Build CPU container locally"
+            echo "  $0 --device-type cpu"
+            echo ""
+            echo "  # Build and push to Artifact Registry"
+            echo "  $0 --device-type gpu --project-id my-project --location us-central1"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+# Convert device type to lowercase
+device_type=${DEVICE_TYPE,,}
+image_name="vllm-${DEVICE_TYPE}"
+
+echo "========================================="
+echo "Local Docker Build Configuration"
+echo "========================================="
+echo "Device Type: $device_type"
+echo "Base Image: $BASE_IMAGE"
+echo "Image Name: $image_name"
+if [[ -n "$PROJECT_ID" ]]; then
+    echo "Target: $LOCATION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$image_name"
+else
+    echo "Target: $image_name (local only)"
+fi
+echo "========================================="
+echo ""
+
+# Handle CPU build - clone and build vLLM base image if needed
+if [[ $device_type == "cpu" ]]; then
+    echo "Building open source vLLM CPU container image..."
+    if [[ ! -d "vllm" ]]; then
+        echo "Cloning vLLM repository..."
+        git clone https://github.com/vllm-project/vllm.git
+    else
+        echo "vLLM directory already exists, using existing clone"
+    fi
+
+    cd vllm
+    echo "Building vLLM CPU base image..."
+    DOCKER_BUILDKIT=1 docker build -t "$BASE_IMAGE" -f docker/Dockerfile.cpu .
+    cd ..
+    echo "vLLM CPU base image built successfully"
+    echo ""
+fi
+
+# Build the custom container
+echo "Building custom container image for: $device_type"
+docker build -t "$image_name" --build-arg BASE_IMAGE="$BASE_IMAGE" .
+
+echo ""
+echo "Build completed successfully!"
+echo "Local image tag: $image_name"
+
+# Optionally push to Artifact Registry
+if [[ -n "$PROJECT_ID" ]]; then
+    remote_tag="$LOCATION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$image_name"
+    echo ""
+    echo "Tagging image for Artifact Registry..."
+    docker tag "$image_name" "$remote_tag"
+
+    echo "Pushing to Artifact Registry: $remote_tag"
+    docker push "$remote_tag"
+    echo ""
+    echo "Push completed successfully!"
+    echo "Remote image: $remote_tag"
+else
+    echo ""
+    echo "Skipping push (no PROJECT_ID provided)"
+    echo "To push to Artifact Registry, run with --project-id option"
+fi
+
+echo ""
+echo "Done!"