Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ const (
autoDeprecationWarning = `UpdateMode "Auto" is deprecated and will be removed in a future API version. ` +
`Use explicit update modes like "Recreate", "Initial", or "InPlaceOrRecreate" instead. ` +
`See https://github.com/kubernetes/autoscaler/issues/8424 for more details.`
// maxAdmissionPayloadSize limits the size of the incoming admission request payload
// to prevent OOM (Denial of Service) attacks. A typical AdmissionReview is well under 100KB,
// and etcd limits objects to 1.5MB. With updates including both the new and old object,
// 5MB is an extremely safe upper bound that leaves a comfortable margin.
maxAdmissionPayloadSize = 1024 * 1024 * 5 // 5MB
)

// AdmissionServer is an admission webhook server that modifies pod resources request based on VPA recommendation
Expand Down Expand Up @@ -192,8 +197,10 @@ func (s *AdmissionServer) Serve(w http.ResponseWriter, r *http.Request) {

var body []byte
if r.Body != nil {
if data, err := io.ReadAll(r.Body); err == nil {
if data, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maxAdmissionPayloadSize)); err == nil {
body = data
} else {
klog.ErrorS(err, "Failed to read admission request body (payload may exceed 5MB limit)")
Comment on lines +200 to +203
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can we be sure that the error here is because of that reason?
Also, printing out 5MB without a reference to the const value can cause errors.
I would expect something like this:

diff --git a/vertical-pod-autoscaler/pkg/admission-controller/logic/server.go b/vertical-pod-autoscaler/pkg/admission-controller/logic/server.go
index d4ee79e64..47c74d431 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/logic/server.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/logic/server.go
@@ -19,6 +19,7 @@ package logic
 import (
        "context"
        "encoding/json"
+       "errors"
        "fmt"
        "io"
        "net/http"
@@ -200,7 +201,12 @@ func (s *AdmissionServer) Serve(w http.ResponseWriter, r *http.Request) {
                if data, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maxAdmissionPayloadSize)); err == nil {
                        body = data
                } else {
-                       klog.ErrorS(err, "Failed to read admission request body (payload may exceed 5MB limit)")
+                       var maxBytesErr *http.MaxBytesError
+                       if errors.As(err, &maxBytesErr) {
+                               klog.ErrorS(err, "Admission request body exceeds size limit", "limit", maxAdmissionPayloadSize)
+                       } else {
+                               klog.ErrorS(err, "Failed to read admission request body")
+                       }
                }
        }

}
}
// verify the content type is accurate
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
Copyright The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package logic

import (
"bytes"
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource"
)

// InfiniteReader simulates an endless stream of bytes to verify LimitReader behavior.
type InfiniteReader struct{}

func (r *InfiniteReader) Read(p []byte) (n int, err error) {
for i := range p {
p[i] = 'a'
}
return len(p), nil
}

func (r *InfiniteReader) Close() error {
return nil
}

func TestServePayloadLimit(t *testing.T) {
tests := []struct {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add more tests here for my above comment?

name string
requestBody *bytes.Buffer
isEndless bool
expectedStatus int
}{
{
name: "Small valid JSON payload",
requestBody: bytes.NewBufferString(`{"kind":"AdmissionReview","apiVersion":"admission.k8s.io/v1","request":{"uid":"123","resource":{"group":"autoscaling.k8s.io","version":"v1","resource":"verticalpodautoscalers"},"requestKind":{"group":"autoscaling.k8s.io","version":"v1","kind":"VerticalPodAutoscaler"}}}`),
expectedStatus: http.StatusOK,
},
{
name: "Oversized payload exceeding 5MB limit",
isEndless: true,
expectedStatus: http.StatusOK, // Fails open, returning 200 OK with unmarshal error rather than crashing
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we rather be expecting a 413 from the server here?

Copy link
Copy Markdown
Author

@sophieliu15 sophieliu15 May 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question! I chose to let the read error "fail open" (proceeding with a 200 OK rather than a 413) for two reasons:

  • Preserving Webhook Behavior: This design represents a minimal change that strictly preserves the VPA webhook's existing, permissive "fail-open" philosophy for JSON parsing/unmarshaling errors (which defaults to Allowed: true). It guarantees the webhook is protected from OOM crashes without changing VPA's business logic or breaking backward compatibility.

  • Preventing Cluster Scheduling Outages Under DoS: Mutating webhooks are inline to all Pod creations/updates. If the webhook is under a heavy DoS flood and we return a 413 HTTP error:

    • The API Server treats non-200 responses as webhook infrastructure failures.
    • If the cluster operator has VPA configured with failurePolicy: Fail, the API Server will immediately block and reject all legitimate user Pod scheduling requests.
    • By using http.MaxBytesReader combined with 200 OK, we discard oversized payloads with minimal resource consumption and do not report this as infrastructure failure to the API Server, ensuring minimal impact on cluster-wide scheduling under DoS.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that's true. This changes the logic to respond with a 200, without the expected payload, see https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#response
api-server will treat this as a failure, no matter the HTTP code.

},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
server := &AdmissionServer{
resourceHandlers: make(map[metav1.GroupResource]resource.Handler), // Unused in basic HTTP parse step
}

var req *http.Request
if tc.isEndless {
req = httptest.NewRequest(http.MethodPost, "/admit", &InfiniteReader{})
} else {
req = httptest.NewRequest(http.MethodPost, "/admit", tc.requestBody)
}
req.Header.Set("Content-Type", "application/json")

w := httptest.NewRecorder()
server.Serve(w, req)

assert.Equal(t, tc.expectedStatus, w.Code)
})
}
}