Skip to content
4 changes: 3 additions & 1 deletion notebooks/official/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,6 @@
/prediction/vertexai_serving_vllm/vertexai_serving_vllm_gpu_llama3_2_3B.ipynb @ravi-dalal
/prediction/vertexai_serving_vllm/vertexai_serving_vllm_tpu_llama3_2_3B.ipynb @ravi-dalal
/prediction/vertexai_serving_vllm/vertexai_serving_vllm_tpu_gcs_llama3_2_3B.ipynb @ravi-dalal
/generative_ai/csm_intro.ipynb @elaidlaw
/generative_ai/csm_intro.ipynb @elaidlaw
/prediction/vertexai_serving_vllm/vertexai_serving_vllm_mistral_7b_with_lora_adopters_custom_container.ipynb @shamikaa
/prediction/vertexai_serving_vllm/vertexai_serving_vllm_mistral_7b_with_lora_adopters_prebuilt_container.ipynb @shamikaa
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive
# Install gcloud SDK
RUN apt-get update && \
apt-get install -y apt-utils git apt-transport-https gnupg ca-certificates curl \
&& echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
&& curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \
&& apt-get update -y && apt-get install google-cloud-cli -y \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /workspace/vllm

# Copy entrypoint.sh to the container
COPY ./entrypoint.sh /workspace/vllm/vertexai/entrypoint.sh
RUN chmod +x /workspace/vllm/vertexai/entrypoint.sh

ENTRYPOINT ["/workspace/vllm/vertexai/entrypoint.sh"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

steps:
- name: 'gcr.io/cloud-builders/docker'
automapSubstitutions: true
script: |
#!/usr/bin/env bash
set -euo pipefail
device_type_param=${_DEVICE_TYPE}
device_type=${device_type_param,,}
base_image=${_BASE_IMAGE}
image_name="vllm-${_DEVICE_TYPE}"
if [[ $device_type == "cpu" ]]; then
echo "Quietly building open source vLLM CPU container image"
git clone -b v0.15.0 https://github.com/vllm-project/vllm.git
cd vllm && DOCKER_BUILDKIT=1 docker build -t $base_image -f docker/Dockerfile.cpu . -q
cd ..
fi
echo "Quietly building container image for: $device_type"
docker build -t $LOCATION-docker.pkg.dev/$PROJECT_ID/${_REPOSITORY}/$image_name --build-arg BASE_IMAGE=$base_image . -q
docker push $LOCATION-docker.pkg.dev/$PROJECT_ID/${_REPOSITORY}/$image_name
substitutions:
_DEVICE_TYPE: gpu
_BASE_IMAGE: vllm/vllm-openai
_REPOSITORY: my-docker-repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/bin/bash

# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euo pipefail

readonly LOCAL_MODEL_DIR=${LOCAL_MODEL_DIR:-"/tmp/model_dir"}
readonly LOCAL_LORA_DIR=${LOCAL_LORA_DIR:-"/tmp/lora_adapters"}

download_model_from_gcs() {
local gcs_uri="$1"
mkdir -p $LOCAL_MODEL_DIR
echo "Downloading model from $gcs_uri to local directory..."
if gcloud storage cp -r "$gcs_uri/*" "$LOCAL_MODEL_DIR"; then
echo "Model downloaded successfully to ${LOCAL_MODEL_DIR}."
else
echo "Failed to download model from Cloud Storage: $gcs_uri." >&2
exit 1
fi
}


download_lora_adapters_from_gcs() {
local gcs_uri="$1"
local local_dir="$2"
mkdir -p "$local_dir"
echo "Downloading LoRA adapters from $gcs_uri to $local_dir..."
if gcloud storage cp -r "$gcs_uri/*" "$local_dir"; then
echo "LoRA adapters downloaded successfully to ${local_dir}."
else
echo "Failed to download LoRA adapters from Cloud Storage: $gcs_uri." >&2
exit 1
fi
}

updated_args=()
model_arg="--model="
lora_modules_arg="--lora-modules="
gcs_protocol="gs://"
lora_counter=0

for a in "$@"; do
if [[ $a == $model_arg* ]]; then
model_path=${a#*=}
echo "Found model: $model_path"
if [[ $model_path == $gcs_protocol* ]]; then
download_model_from_gcs $model_path
updated_args+=("--model=${LOCAL_MODEL_DIR}")
else
updated_args+=("--model=${model_path}")
fi
elif [[ $a == $lora_modules_arg* ]]; then
lora_spec=${a#*=}
echo "Found LoRA module: $lora_spec"

# LoRA modules can be in format "name=path" or just "path"
if [[ $lora_spec == *"="* ]]; then
lora_name=${lora_spec%%=*}
lora_path=${lora_spec#*=}
else
lora_name="lora_$lora_counter"
lora_path=$lora_spec
((lora_counter++))
fi

if [[ $lora_path == $gcs_protocol* ]]; then
local_lora_path="${LOCAL_LORA_DIR}/${lora_name}"
download_lora_adapters_from_gcs "$lora_path" "$local_lora_path"
updated_args+=("--lora-modules=${lora_name}=${local_lora_path}")
else
updated_args+=("$a")
fi
else
updated_args+=("$a")
fi
done

echo "Launch command: " "${updated_args[@]}"
exec "${updated_args[@]}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env bash

# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euo pipefail

# Default configurations (same as cloudbuild.yaml substitutions)
DEVICE_TYPE=${DEVICE_TYPE:-gpu}
BASE_IMAGE=${BASE_IMAGE:-vllm/vllm-openai}
REPOSITORY=${REPOSITORY:-my-docker-repo}
PROJECT_ID=${PROJECT_ID:-}
LOCATION=${LOCATION:-us-central1}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--device-type)
DEVICE_TYPE="$2"
shift 2
;;
--base-image)
BASE_IMAGE="$2"
shift 2
;;
--repository)
REPOSITORY="$2"
shift 2
;;
--project-id)
PROJECT_ID="$2"
shift 2
;;
--location)
LOCATION="$2"
shift 2
;;
--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --device-type Device type: gpu or cpu (default: gpu)"
echo " --base-image Base Docker image (default: vllm/vllm-openai)"
echo " --repository Artifact Registry repository (default: my-docker-repo)"
echo " --project-id GCP Project ID (required for push)"
echo " --location GCP location (default: us-central1)"
echo " --help Show this help message"
echo ""
echo "Examples:"
echo " # Build GPU container locally"
echo " $0 --device-type gpu"
echo ""
echo " # Build CPU container locally"
echo " $0 --device-type cpu"
echo ""
echo " # Build and push to Artifact Registry"
echo " $0 --device-type gpu --project-id my-project --location us-central1"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done

# Convert device type to lowercase
device_type=${DEVICE_TYPE,,}
image_name="vllm-${DEVICE_TYPE}"

echo "========================================="
echo "Local Docker Build Configuration"
echo "========================================="
echo "Device Type: $device_type"
echo "Base Image: $BASE_IMAGE"
echo "Image Name: $image_name"
if [[ -n "$PROJECT_ID" ]]; then
echo "Target: $LOCATION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$image_name"
else
echo "Target: $image_name (local only)"
fi
echo "========================================="
echo ""

# Handle CPU build - clone and build vLLM base image if needed
if [[ $device_type == "cpu" ]]; then
echo "Building open source vLLM CPU container image..."
if [[ ! -d "vllm" ]]; then
echo "Cloning vLLM repository..."
git clone --branch v0.5.1 https://github.com/vllm-project/vllm.git --depth 1
else
echo "vLLM directory already exists, using existing clone"
fi

cd vllm
echo "Building vLLM CPU base image..."
DOCKER_BUILDKIT=1 docker build -t "$BASE_IMAGE" -f docker/Dockerfile.cpu .
cd ..
echo "vLLM CPU base image built successfully"
echo ""
fi

# Build the custom container
echo "Building custom container image for: $device_type"
docker build -t "$image_name" --build-arg BASE_IMAGE="$BASE_IMAGE" .

echo ""
echo "Build completed successfully!"
echo "Local image tag: $image_name"

# Optionally push to Artifact Registry
if [[ -n "$PROJECT_ID" ]]; then
remote_tag="$LOCATION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$image_name"
echo ""
echo "Tagging image for Artifact Registry..."
docker tag "$image_name" "$remote_tag"

echo "Pushing to Artifact Registry: $remote_tag"
docker push "$remote_tag"
echo ""
echo "Push completed successfully!"
echo "Remote image: $remote_tag"
else
echo ""
echo "Skipping push (no PROJECT_ID provided)"
echo "To push to Artifact Registry, run with --project-id option"
fi

echo ""
echo "Done!"
Loading
Loading