Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions litellm/callbacks/message_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
Production message filter callback - removes cache-busting content.
"""

from typing import Optional, Union
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm._logging import verbose_proxy_logger
from litellm.types.utils import CallTypesLiteral


class MessageFilterProd(CustomGuardrail):
"""
Production message filter that removes dynamic cache-busting content.

Removes billing headers with unique identifiers (like cch=xxxxx) that
prevent KV cache utilization. Logs activity via verbose_proxy_logger only.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.filter_keywords = kwargs.get(
"filter_keywords", ["x-anthropic-billing-header"]
)

def _should_remove_content_block(self, content_block: dict) -> bool:
"""Check if content block starts with filter keywords."""
if not isinstance(content_block, dict):
return False

if content_block.get("type") != "text":
return False

text = content_block.get("text", "")
if not isinstance(text, str):
return False

# Check if text starts with any filter keyword
for keyword in self.filter_keywords:
if text.startswith(keyword):
verbose_proxy_logger.debug(
f"Removing content block starting with '{keyword}': {text[:80]}..."
Comment thread
SirajuddinShaik marked this conversation as resolved.
Outdated
)
return True

return False

def _filter_content(self, content):
"""Filter content (string or list of blocks)."""
if isinstance(content, str):
return content

Copilot AI Mar 11, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_filter_content() returns strings unchanged, so this hook will never remove the x-anthropic-billing-header: metadata when it appears as a string (e.g., system message content can be a plain string in existing Anthropic translation paths). To actually prevent cache-busting, add string handling that detects/removes (or returns None for) strings starting with the billing-header prefix and ensure callers drop the field/message when the result is None.

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

if isinstance(content, list):
return [
block
for block in content
if not self._should_remove_content_block(block)
]

return content

async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: CallTypesLiteral,
) -> Optional[Union[Exception, str, dict]]:
"""Filter cache-busting content from messages and system fields."""
Comment on lines +65 to +72

Copilot AI Mar 11, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR adds new behavior that mutates request payloads in a pre-call hook but doesn’t include tests. Please add at least one unit test covering: (1) filtering when the header appears in string content (system message/content), (2) filtering when it appears as a [{type:'text', text:'...'}] block list, and (3) the empty-after-filter case to ensure the message/system field is removed rather than left empty.

Copilot uses AI. Check for mistakes.
filtered_count = 0

# Filter messages array
messages = data.get("messages")
if messages:
Comment thread
SirajuddinShaik marked this conversation as resolved.
Outdated
for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if content is None:
continue

filtered_content = self._filter_content(content)
if filtered_content != content:
message["content"] = filtered_content
filtered_count += 1
Comment on lines +86 to +96

Copilot AI Mar 11, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If all content blocks are filtered out, filtered_content becomes an empty list and the message is still kept with content=[]. Several provider adapters (including Anthropic) treat empty text blocks/content as invalid and can error. After filtering, if content becomes empty (or None if you adopt that), remove the entire message entry (and similarly clear data['system']) instead of leaving an empty content value.

Copilot uses AI. Check for mistakes.
verbose_proxy_logger.info(
f"Filtered cache-busting content from {message.get('role', 'unknown')} message"
)

# Filter system field
system = data.get("system")
if system:
filtered_system = self._filter_content(system)
if filtered_system != system:
data["system"] = filtered_system
filtered_count += 1
verbose_proxy_logger.info(
"Filtered cache-busting content from system field"
)

if filtered_count > 0:
verbose_proxy_logger.info(
f"Message filter: removed cache-busting content from {filtered_count} location(s)"
)

return data


# Production instance
message_filter_prod = MessageFilterProd(filter_keywords=["x-anthropic-billing-header"])

Copilot AI Mar 11, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default/production filter_keywords uses the prefix "x-anthropic-billing-header" without the trailing :. This will also match and remove any user content that happens to begin with that phrase (not necessarily the metadata line). Consider using the more specific prefix used elsewhere in the codebase ("x-anthropic-billing-header:") or otherwise tightening the match to reduce false positives.

Copilot uses AI. Check for mistakes.
Loading