Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5dfd0cd
docs(reborn): add Hermes-style context-management design (tool disclo…
serrrfirat Jun 22, 2026
8f56d75
feat(reborn): Phase 0 shadow measurement for context management
serrrfirat Jun 22, 2026
db9d099
feat(reborn): Phase 1.1 tool-disclosure catalog + selector + token be…
serrrfirat Jun 22, 2026
dda1e59
feat(reborn): Phase 1.2 wire tool disclosure via capability-port deco…
serrrfirat Jun 23, 2026
3c3cc9d
docs(reborn): record implementation status + codebase grounding for c…
serrrfirat Jun 23, 2026
2dc0127
test(reborn): negative-path + flag-off coverage for tool-disclosure b…
serrrfirat Jun 23, 2026
336aa16
docs(reborn): mark negative-path bridge tests done; only canary remains
serrrfirat Jun 23, 2026
7aff964
chore(reborn)!: TEMPORARY default tool disclosure ON for remote bench…
serrrfirat Jun 23, 2026
5d8f068
fix(reborn): address Gemini + CodeRabbit review feedback on PR #5149
serrrfirat Jun 23, 2026
41bb5ea
fix(reborn): CI fix (pin QA replays to disclosure-off) + 2 newer Code…
serrrfirat Jun 23, 2026
aa12134
fix(reborn): correct+widen disclosure core (B) and forgive deferred t…
serrrfirat Jun 23, 2026
ecaeb06
feat(reborn): log full-vs-disclosed tool-surface savings per turn (sh…
serrrfirat Jun 23, 2026
58a5140
feat(reborn): Hermes-shaped disclosure core + count-aware tool_search
serrrfirat Jun 23, 2026
4aebf24
fix(reborn): forgiving bridge actually dispatches direct deferred too…
serrrfirat Jun 23, 2026
12a1861
test(reborn): prove forgiving direct-deferred dispatch is provider-ag…
serrrfirat Jun 23, 2026
e719896
feat(reborn): teach the model the tool-disclosure protocol in the sys…
serrrfirat Jun 23, 2026
9908836
fix(reborn): address CodeRabbit review — preserve authority surface +…
serrrfirat Jun 23, 2026
2e1e1ac
debug(reborn): temporary diagnostics for direct-deferred resolution miss
serrrfirat Jun 23, 2026
04e8bc7
fix(reborn): make the full extension lifecycle core so onboarding works
serrrfirat Jun 23, 2026
ca69d08
fix(reborn): resolve deferred tools by any name form + revert step-1 …
serrrfirat Jun 24, 2026
44c6183
debug(reborn): startup marker for the tool-disclosure build
serrrfirat Jun 24, 2026
ba77135
style(reborn): rustfmt wrap in matcher test
serrrfirat Jun 24, 2026
2cf4f00
debug(reborn): bump disclosure marker to v2 + matcher self-test
serrrfirat Jun 24, 2026
f52546c
debug(reborn): unconditional entry probe on context_shadow target
serrrfirat Jun 24, 2026
aaadd04
fix(reborn): surface filters must delegate provider-tool-call resolut…
serrrfirat Jun 24, 2026
116f226
debug(reborn): marker v3 echoes loop_support filter build constant
serrrfirat Jun 24, 2026
f9a436a
fix(reborn): SurfaceTrackingLoopCapabilityPort must delegate tool-cal…
serrrfirat Jun 24, 2026
0c34188
fix(reborn): record canonical wire name for forgiving direct-deferred…
serrrfirat Jun 24, 2026
d167e71
fix(reborn): make tool_call bridge resolve undisclosed catalog tools …
serrrfirat Jun 24, 2026
c701c33
fix(reborn): collapse repeated identical error observations in replay…
serrrfirat Jun 24, 2026
83ae258
fix(reborn): log provider tool-call validate/register rejections in t…
serrrfirat Jun 24, 2026
519dbc7
fix(reborn): make forgiving tool_call failures recoverable, not run-b…
serrrfirat Jun 25, 2026
48fd811
Merge origin/main into firat/reborn-context-management
serrrfirat Jun 25, 2026
7086bf6
fix(reborn): log the rejecting layer's reason on gateway tool-call re…
serrrfirat Jun 25, 2026
7ae4fef
fix(reborn): promote gate-suspended tools so they survive a BlockedAu…
serrrfirat Jun 25, 2026
5b4e9be
fix(reborn): describe-first schema-on-blind-call for deferred tools
serrrfirat Jun 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 128 additions & 2 deletions crates/ironclaw_llm/src/nearai_chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -581,14 +581,16 @@ impl LlmProvider for NearAiChatProvider {
};

let (input_tokens, output_tokens) = parse_usage(response.usage.as_ref());
let cached_tokens = parse_cached_tokens(response.usage.as_ref());
emit_context_shadow_usage(input_tokens, output_tokens, cached_tokens);

Ok(CompletionResponse {
content,
finish_reason,
input_tokens,
output_tokens,
reasoning: provider_reasoning,
cache_read_input_tokens: 0,
cache_read_input_tokens: cached_tokens.unwrap_or(0),
cache_creation_input_tokens: 0,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
})
}
Expand Down Expand Up @@ -692,14 +694,16 @@ impl LlmProvider for NearAiChatProvider {
};

let (input_tokens, output_tokens) = parse_usage(response.usage.as_ref());
let cached_tokens = parse_cached_tokens(response.usage.as_ref());
emit_context_shadow_usage(input_tokens, output_tokens, cached_tokens);

Ok(ToolCompletionResponse {
content,
tool_calls,
finish_reason,
input_tokens,
output_tokens,
cache_read_input_tokens: 0,
cache_read_input_tokens: cached_tokens.unwrap_or(0),
cache_creation_input_tokens: 0,
reasoning: provider_reasoning,
})
Expand Down Expand Up @@ -1139,6 +1143,16 @@ struct ChatCompletionUsage {
completion_tokens: Option<u64>,
#[serde(default)]
total_tokens: Option<u64>,
#[serde(default)]
prompt_tokens_details: Option<PromptTokensDetails>,
#[serde(default)]
cached_tokens: Option<u64>,
}

#[derive(Debug, Deserialize, Default)]
struct PromptTokensDetails {
#[serde(default)]
cached_tokens: Option<u64>,
}

fn saturate_u32(val: u64) -> u32 {
Expand All @@ -1161,6 +1175,33 @@ fn emit_reasoning_trace(reasoning: Option<&str>) {
}
}

fn emit_context_shadow_usage(
prompt_tokens: u32,
completion_tokens: u32,
cached_tokens: Option<u32>,
) {
const CONTEXT_SHADOW_TARGET: &str = "ironclaw::reborn::context_shadow";
let cached_tokens_field = cached_tokens.map(i64::from).unwrap_or(-1);
if let Some(cached_tokens) = cached_tokens.filter(|_| prompt_tokens > 0) {
tracing::debug!(
target: CONTEXT_SHADOW_TARGET,
prompt_tokens,
completion_tokens,
cached_tokens = cached_tokens_field,
cache_hit_ratio = cached_tokens as f64 / prompt_tokens as f64,
"nearai chat usage shadow measurement"
);
} else {
tracing::debug!(
target: CONTEXT_SHADOW_TARGET,
prompt_tokens,
completion_tokens,
cached_tokens = cached_tokens_field,
"nearai chat usage shadow measurement"
);
}
}

fn parse_usage(usage: Option<&ChatCompletionUsage>) -> (u32, u32) {
let Some(u) = usage else {
return (0, 0);
Expand All @@ -1177,6 +1218,16 @@ fn parse_usage(usage: Option<&ChatCompletionUsage>) -> (u32, u32) {
(input, output)
}

fn parse_cached_tokens(usage: Option<&ChatCompletionUsage>) -> Option<u32> {
let usage = usage?;
usage
.prompt_tokens_details
.as_ref()
.and_then(|details| details.cached_tokens)
.or(usage.cached_tokens)
.map(saturate_u32)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -2740,6 +2791,8 @@ mod tests {
prompt_tokens: Some(100),
completion_tokens: Some(50),
total_tokens: Some(150),
prompt_tokens_details: None,
cached_tokens: None,
};
assert_eq!(parse_usage(Some(&usage)), (100, 50));
}
Expand All @@ -2755,6 +2808,8 @@ mod tests {
prompt_tokens: Some(100),
completion_tokens: None,
total_tokens: Some(180),
prompt_tokens_details: None,
cached_tokens: None,
};
// output = total - prompt = 80
assert_eq!(parse_usage(Some(&usage)), (100, 80));
Expand All @@ -2766,6 +2821,8 @@ mod tests {
prompt_tokens: None,
completion_tokens: None,
total_tokens: Some(200),
prompt_tokens_details: None,
cached_tokens: None,
};
// input = 0 (no prompt), output = total = 200
assert_eq!(parse_usage(Some(&usage)), (0, 200));
Expand All @@ -2777,6 +2834,8 @@ mod tests {
prompt_tokens: None,
completion_tokens: None,
total_tokens: None,
prompt_tokens_details: None,
cached_tokens: None,
};
assert_eq!(parse_usage(Some(&usage)), (0, 0));
}
Expand Down Expand Up @@ -2931,13 +2990,80 @@ mod tests {
assert_eq!(usage.total_tokens, Some(500));
}

#[test]
fn test_usage_deserialize_nested_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"prompt_tokens_details": {
"cached_tokens": 80
}
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_usage(Some(&usage)), (100, 25));
assert_eq!(parse_cached_tokens(Some(&usage)), Some(80));
}

#[test]
fn test_usage_deserialize_top_level_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"cached_tokens": 40
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), Some(40));
}

#[test]
fn test_usage_deserialize_prefers_nested_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"prompt_tokens_details": {
"cached_tokens": 80
},
"cached_tokens": 40
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), Some(80));
}

#[test]
fn test_usage_deserialize_cached_tokens_absent() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), None);
}

#[test]
fn test_usage_without_details_still_parses_token_counts() {
let json = r#"{
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_usage(Some(&usage)), (10, 5));
assert_eq!(parse_cached_tokens(Some(&usage)), None);
}

#[test]
fn test_usage_deserialize_empty_object() {
let json = "{}";
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert!(usage.prompt_tokens.is_none());
assert!(usage.completion_tokens.is_none());
assert!(usage.total_tokens.is_none());
assert!(usage.prompt_tokens_details.is_none());
assert!(usage.cached_tokens.is_none());
}

// -- ChatCompletionToolCall serde roundtrip --------------------------------
Expand Down
24 changes: 24 additions & 0 deletions crates/ironclaw_reborn/src/context_shadow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/// Deterministic shadow token estimator for prompt-surface diagnostics.
#[allow(clippy::manual_div_ceil)]
pub(crate) fn estimate_tokens(s: &str) -> u32 {
let char_count = s.chars().count();
((char_count + 3) / 4).min(u32::MAX as usize) as u32
}

#[cfg(test)]
mod tests {
use super::estimate_tokens;

#[test]
fn estimate_tokens_uses_ceiling_char_count_over_four() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens("a"), 1);
assert_eq!(estimate_tokens("abcd"), 1);
assert_eq!(estimate_tokens("abcde"), 2);
}

#[test]
fn estimate_tokens_counts_chars_not_bytes() {
assert_eq!(estimate_tokens("ééééé"), 2);
}
}
3 changes: 3 additions & 0 deletions crates/ironclaw_reborn/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
//! the boundary tests are designed to prevent.

pub mod app_loop_family;
mod context_shadow;
pub mod driver_registry;
pub mod failure_categories;
pub mod hook_gate_refs;
Expand All @@ -34,6 +35,8 @@ pub mod runtime;
pub mod subagent;
pub mod text_loop_driver;
pub mod thread_scope;
mod tool_disclosure;
mod tool_disclosure_port;
pub mod turn_run_executor;
pub mod turn_runner;

Expand Down
21 changes: 21 additions & 0 deletions crates/ironclaw_reborn/src/model_gateway.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ use crate::{
const MODEL_CREDITS_EXHAUSTED_SUMMARY: &str = "model provider account is out of credits";
const PROVIDER_TOOL_ARGUMENTS_OMITTED_MARKER: &str =
"arguments omitted because they exceeded the host provider-tool limit";
const CONTEXT_SHADOW_TARGET: &str = "ironclaw::reborn::context_shadow";

/// Fail-closed routing policy from resolved Reborn model profile ids to the
/// host-selected provider/model envelope.
Expand Down Expand Up @@ -826,6 +827,15 @@ where
"reborn model gateway resolved provider tool definitions"
);
}
if tracing::enabled!(target: CONTEXT_SHADOW_TARGET, tracing::Level::DEBUG) {
let est_tool_schema_tokens = estimate_tool_schema_tokens(&tool_definitions);
debug!(
target: CONTEXT_SHADOW_TARGET,
tool_definition_count = tool_definitions.len(),
est_tool_schema_tokens,
"reborn tool surface shadow measurement"
);
}
if !tool_definitions.is_empty() {
let mut recovery_tool_names = Vec::with_capacity(tool_definitions.len());
let llm_tool_definitions = tool_definitions
Expand Down Expand Up @@ -975,6 +985,17 @@ fn provider_tool_definition_to_llm(definition: ProviderToolDefinition) -> ToolDe
}
}

fn estimate_tool_schema_tokens(definitions: &[ProviderToolDefinition]) -> u32 {
definitions.iter().fold(0_u32, |total, definition| {
let schema = serde_json::json!({
"name": definition.name.as_str(),
"description": definition.description.as_str(),
"parameters": &definition.parameters,
});
total.saturating_add(crate::context_shadow::estimate_tokens(&schema.to_string()))
})
}

#[tracing::instrument(
level = "debug",
skip(response, capabilities, replay_identity),
Expand Down
Loading
Loading