Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,18 @@ The bootstrap script resolves cache sources with a fallback chain:

Cache is stored as registry refs in private ECR repositories with 14-day expiration.

CUDA release image builds export BuildKit registry cache to a separate private
ECR cache repository:

- **Release image cache**:
`936637512419.dkr.ecr.us-east-1.amazonaws.com/vllm-release-cache:{variant}`

Use a stable architecture/CUDA/Ubuntu variant key, for example
`x86_64-cu130-ubuntu2204`. Release jobs can import the ordinary CI cache
fallback chain with additional `--cache-from` flags, but should write
release-specific layers only to `vllm-release-cache` so they do not churn the CI
test or postmerge cache repositories.

### Warm-Cache AMI

A daily Buildkite pipeline (`.buildkite/pipelines/rebuild-cpu-ami.yml`, scheduled at 3 AM PST) builds custom EC2 AMIs with pre-warmed Docker layer caches:
Expand Down
8 changes: 7 additions & 1 deletion docker/ci.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,16 @@ variable "CACHE_FROM" {
default = ""
}

# Compatibility alias for older callers. Prefer CACHE_FROM_BASE_BRANCH, which
# matches the value resolved by vLLM's image build wrapper.
variable "CACHE_FROM_BASE" {
default = ""
}

variable "CACHE_FROM_BASE_BRANCH" {
default = CACHE_FROM_BASE
}

variable "CACHE_FROM_MAIN" {
default = ""
}
Expand All @@ -105,7 +111,7 @@ function "get_cache_from" {
PARENT_COMMIT != "" ? "type=registry,ref=${REGISTRY}/vllm-ci-test-cache:${PARENT_COMMIT},mode=max" : "",
VLLM_MERGE_BASE_COMMIT != "" ? "type=registry,ref=${REGISTRY}/vllm-ci-test-cache:${VLLM_MERGE_BASE_COMMIT},mode=max" : "",
CACHE_FROM != "" ? "type=registry,ref=${CACHE_FROM},mode=max" : "",
CACHE_FROM_BASE != "" ? "type=registry,ref=${CACHE_FROM_BASE},mode=max" : "",
CACHE_FROM_BASE_BRANCH != "" ? "type=registry,ref=${CACHE_FROM_BASE_BRANCH},mode=max" : "",
CACHE_FROM_MAIN != "" ? "type=registry,ref=${CACHE_FROM_MAIN},mode=max" : "",
])
}
Expand Down
11 changes: 11 additions & 0 deletions terraform/aws/ecr.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ resource "aws_ecr_repository" "vllm_ci_postmerge_cache" {
provider = aws.us_east_1
}

resource "aws_ecr_repository" "vllm_release_cache" {
name = "vllm-release-cache"
provider = aws.us_east_1
}

# Lifecycle policies for cache repositories
resource "aws_ecr_lifecycle_policy" "vllm_ci_test_cache" {
repository = aws_ecr_repository.vllm_ci_test_cache.name
Expand All @@ -38,3 +43,9 @@ resource "aws_ecr_lifecycle_policy" "vllm_ci_postmerge_cache" {
provider = aws.us_east_1
policy = local.ecr_cache_lifecycle_policy
}

resource "aws_ecr_lifecycle_policy" "vllm_release_cache" {
repository = aws_ecr_repository.vllm_release_cache.name
provider = aws.us_east_1
policy = local.ecr_cache_lifecycle_policy
}
46 changes: 46 additions & 0 deletions terraform/aws/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,42 @@ resource "aws_iam_policy" "postmerge_ecr_cache_read_write_access_policy" {
})
}

resource "aws_iam_policy" "release_ecr_cache_read_write_access_policy" {
name = "release_ecr_cache_read_write_access_policy"
description = "Policy to read and write cache to release cache repo"

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ecr:BatchCheckLayerAvailability",
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer",
"ecr:CompleteLayerUpload",
"ecr:UploadLayerPart",
"ecr:InitiateLayerUpload",
"ecr:PutImage",
"ecr:GetAuthorizationToken",
"sts:GetServiceBearerToken"
]
Resource = [
"arn:aws:ecr:us-east-1:936637512419:repository/vllm-release-cache"
]
},
{
Effect = "Allow"
Action = [
"ecr:GetAuthorizationToken",
"sts:GetServiceBearerToken"
],
Resource = "*"
}
]
})
}

resource "aws_iam_policy" "release_ecr_public_read_write_access_policy" {
name = "release-ecr-public-read-write-access-policy"
description = "Policy to push and pull images from release ECR"
Expand Down Expand Up @@ -429,6 +465,16 @@ resource "aws_iam_role_policy_attachment" "postmerge_ecr_cache_read_write_access
policy_arn = aws_iam_policy.postmerge_ecr_cache_read_write_access_policy.arn
}

resource "aws_iam_role_policy_attachment" "release_ecr_cache_read_write_access" {
for_each = merge(
aws_cloudformation_stack.bk_queue_postmerge,
aws_cloudformation_stack.bk_queue_postmerge_us_east_1,
aws_cloudformation_stack.bk_queue_release,
)
role = each.value.outputs.InstanceRoleName
policy_arn = aws_iam_policy.release_ecr_cache_read_write_access_policy.arn
}

resource "aws_iam_role_policy_attachment" "release_ecr_public_read_write_access" {
for_each = merge(
aws_cloudformation_stack.bk_queue_postmerge,
Expand Down