Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5dfd0cd
docs(reborn): add Hermes-style context-management design (tool disclo…
serrrfirat Jun 22, 2026
8f56d75
feat(reborn): Phase 0 shadow measurement for context management
serrrfirat Jun 22, 2026
db9d099
feat(reborn): Phase 1.1 tool-disclosure catalog + selector + token be…
serrrfirat Jun 22, 2026
dda1e59
feat(reborn): Phase 1.2 wire tool disclosure via capability-port deco…
serrrfirat Jun 23, 2026
3c3cc9d
docs(reborn): record implementation status + codebase grounding for c…
serrrfirat Jun 23, 2026
2dc0127
test(reborn): negative-path + flag-off coverage for tool-disclosure b…
serrrfirat Jun 23, 2026
336aa16
docs(reborn): mark negative-path bridge tests done; only canary remains
serrrfirat Jun 23, 2026
7aff964
chore(reborn)!: TEMPORARY default tool disclosure ON for remote bench…
serrrfirat Jun 23, 2026
5d8f068
fix(reborn): address Gemini + CodeRabbit review feedback on PR #5149
serrrfirat Jun 23, 2026
41bb5ea
fix(reborn): CI fix (pin QA replays to disclosure-off) + 2 newer Code…
serrrfirat Jun 23, 2026
aa12134
fix(reborn): correct+widen disclosure core (B) and forgive deferred t…
serrrfirat Jun 23, 2026
ecaeb06
feat(reborn): log full-vs-disclosed tool-surface savings per turn (sh…
serrrfirat Jun 23, 2026
58a5140
feat(reborn): Hermes-shaped disclosure core + count-aware tool_search
serrrfirat Jun 23, 2026
4aebf24
fix(reborn): forgiving bridge actually dispatches direct deferred too…
serrrfirat Jun 23, 2026
12a1861
test(reborn): prove forgiving direct-deferred dispatch is provider-ag…
serrrfirat Jun 23, 2026
e719896
feat(reborn): teach the model the tool-disclosure protocol in the sys…
serrrfirat Jun 23, 2026
9908836
fix(reborn): address CodeRabbit review — preserve authority surface +…
serrrfirat Jun 23, 2026
2e1e1ac
debug(reborn): temporary diagnostics for direct-deferred resolution miss
serrrfirat Jun 23, 2026
04e8bc7
fix(reborn): make the full extension lifecycle core so onboarding works
serrrfirat Jun 23, 2026
ca69d08
fix(reborn): resolve deferred tools by any name form + revert step-1 …
serrrfirat Jun 24, 2026
44c6183
debug(reborn): startup marker for the tool-disclosure build
serrrfirat Jun 24, 2026
ba77135
style(reborn): rustfmt wrap in matcher test
serrrfirat Jun 24, 2026
2cf4f00
debug(reborn): bump disclosure marker to v2 + matcher self-test
serrrfirat Jun 24, 2026
f52546c
debug(reborn): unconditional entry probe on context_shadow target
serrrfirat Jun 24, 2026
aaadd04
fix(reborn): surface filters must delegate provider-tool-call resolut…
serrrfirat Jun 24, 2026
116f226
debug(reborn): marker v3 echoes loop_support filter build constant
serrrfirat Jun 24, 2026
f9a436a
fix(reborn): SurfaceTrackingLoopCapabilityPort must delegate tool-cal…
serrrfirat Jun 24, 2026
0c34188
fix(reborn): record canonical wire name for forgiving direct-deferred…
serrrfirat Jun 24, 2026
d167e71
fix(reborn): make tool_call bridge resolve undisclosed catalog tools …
serrrfirat Jun 24, 2026
c701c33
fix(reborn): collapse repeated identical error observations in replay…
serrrfirat Jun 24, 2026
83ae258
fix(reborn): log provider tool-call validate/register rejections in t…
serrrfirat Jun 24, 2026
519dbc7
fix(reborn): make forgiving tool_call failures recoverable, not run-b…
serrrfirat Jun 25, 2026
48fd811
Merge origin/main into firat/reborn-context-management
serrrfirat Jun 25, 2026
7086bf6
fix(reborn): log the rejecting layer's reason on gateway tool-call re…
serrrfirat Jun 25, 2026
7ae4fef
fix(reborn): promote gate-suspended tools so they survive a BlockedAu…
serrrfirat Jun 25, 2026
5b4e9be
fix(reborn): describe-first schema-on-blind-call for deferred tools
serrrfirat Jun 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions crates/ironclaw_agent_loop/src/executor/prompt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,27 @@ impl<'a> PromptPlanningPipeline<'a> {
}

let surface = self.visible_surface(surface_filter).await?;
let capability_view = LoopModelCapabilityView {
visible_capability_ids: surface
// The capability view drives call-time authorization (the model-visible
// capability filter), which must permit every tool the model can legitimately
// invoke this turn — not just the advertised subset. Under progressive tool
// disclosure the surface narrows `descriptors` to the advertised set but
// carries the full reachable catalog in `callable_capability_ids`; use that
// wider set so bridge / forgiving-direct calls to disclosed-but-unadvertised
// tools aren't rejected as "outside the model-visible capability view".
// Advertising and prompt rendering still use the narrow `descriptors`.
// Empty `callable_capability_ids` means no narrowing is in effect, so fall
// back to `descriptors` (preserves non-disclosure behavior exactly).
let visible_capability_ids = if surface.callable_capability_ids.is_empty() {
surface
.descriptors
.iter()
.map(|descriptor| descriptor.capability_id.clone())
.collect(),
.collect()
} else {
surface.callable_capability_ids.clone()
};
let capability_view = LoopModelCapabilityView {
visible_capability_ids,
Comment on lines +236 to +256

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security & Privacy | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== prompt pipeline branch =="
rg -n -C4 'callable_capability_ids|visible_capability_ids|apply_capability_filter' \
  crates/ironclaw_agent_loop/src/executor/prompt.rs

echo
echo "== filter implementations touching the visible surface =="
rg -n -C4 'callable_capability_ids|descriptors|apply_capability_filter|CapabilitySurfaceProfileFilter|CapabilitySurfaceDenyFilter' \
  crates/ironclaw_loop_support/src

echo
echo "== tests that exercise non-empty callable_capability_ids =="
rg -n -C2 'callable_capability_ids:\s*vec!\[|callable_capability_ids:\s*Vec::new\(\)' \
  crates tests

Repository: nearai/ironclaw

Length of output: 50372


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== prompt.rs surface -> capability view path =="
sed -n '220,270p' crates/ironclaw_agent_loop/src/executor/prompt.rs

echo
echo "== capability_surface_filter.rs visible_capabilities implementations =="
sed -n '250,420p' crates/ironclaw_loop_support/src/capability_surface_filter.rs

echo
echo "== any non-empty callable_capability_ids constructors =="
rg -n -C3 'callable_capability_ids:\s*(?!Vec::new\(\)|vec!\[\])' \
  crates/ironclaw_loop_support/src crates/ironclaw_agent_loop/src \
  --pcre2

Repository: nearai/ironclaw

Length of output: 21215


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== prompt.rs surface -> capability view path =="
sed -n '220,270p' crates/ironclaw_agent_loop/src/executor/prompt.rs

echo
echo "== capability_surface_filter.rs visible_capabilities implementations =="
sed -n '250,420p' crates/ironclaw_loop_support/src/capability_surface_filter.rs

echo
echo "== any non-empty callable_capability_ids constructors =="
rg -n -C3 'callable_capability_ids:\s*(?!Vec::new\(\)|vec!\[\])' \
  crates/ironclaw_loop_support/src crates/ironclaw_agent_loop/src \
  --pcre2

Repository: nearai/ironclaw

Length of output: 21215


Intersect callable_capability_ids when filtering visible surfaces.
CapabilitySurfaceProfileFilter::visible_capabilities and CapabilitySurfaceDenyFilter::visible_capabilities only trim descriptors; crates/ironclaw_agent_loop/src/executor/prompt.rs now authorizes against callable_capability_ids. That leaves a path for an outer filter to drop a tool from the model-visible surface but still let it through call-time auth. Add a regression test with a non-empty callable_capability_ids surface.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/ironclaw_agent_loop/src/executor/prompt.rs` around lines 236 - 256,
The call-time authorization path in prompt.rs now uses callable_capability_ids
directly, but the visible-capability filters only narrow descriptors, so a tool
can be hidden from the model-visible surface yet still remain callable. Update
CapabilitySurfaceProfileFilter::visible_capabilities and
CapabilitySurfaceDenyFilter::visible_capabilities to intersect or otherwise
constrain callable_capability_ids in the same way they constrain descriptors,
and add a regression test covering a surface with non-empty
callable_capability_ids to verify hidden tools are excluded from authorization.

Source: Path instructions

};
self.state.surface_version = Some(surface.version.clone());
if let Some(exit) = self.cancel_boundary().await? {
Expand Down
1 change: 1 addition & 0 deletions crates/ironclaw_agent_loop/src/executor/tests/support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,7 @@ impl ironclaw_turns::run_profile::LoopCapabilityPort for MockHost {
Ok(VisibleCapabilitySurface {
version: self.visible_surface_version.clone(),
descriptors,
callable_capability_ids: Vec::new(),
})
}

Expand Down
1 change: 1 addition & 0 deletions crates/ironclaw_agent_loop/src/test_support/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ impl ironclaw_turns::run_profile::LoopCapabilityPort for MockAgentLoopDriverHost
) -> Result<VisibleCapabilitySurface, AgentLoopHostError> {
self.record_call(MockHostCall::VisibleCapabilities);
Ok(VisibleCapabilitySurface {
callable_capability_ids: Vec::new(),
version: surface_version(),
descriptors: self.visible_capabilities.clone(),
})
Expand Down
1 change: 1 addition & 0 deletions crates/ironclaw_hooks/src/middleware/capability_port.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ mod tests {
_request: VisibleCapabilityRequest,
) -> Result<VisibleCapabilitySurface, AgentLoopHostError> {
Ok(VisibleCapabilitySurface {
callable_capability_ids: Vec::new(),
version: CapabilitySurfaceVersion::new("v1").expect("ok"),
descriptors: vec![CapabilityDescriptorView {
capability_id: CapabilityId::new("cap.x").expect("ok"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ impl LoopCapabilityPort for AlwaysCompletedPort {
_request: VisibleCapabilityRequest,
) -> Result<VisibleCapabilitySurface, AgentLoopHostError> {
Ok(VisibleCapabilitySurface {
callable_capability_ids: Vec::new(),
version: CapabilitySurfaceVersion::new("v1").expect("ok"),
descriptors: vec![CapabilityDescriptorView {
capability_id: CapabilityId::new("cap.x").expect("ok"),
Expand Down
130 changes: 128 additions & 2 deletions crates/ironclaw_llm/src/nearai_chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -585,14 +585,16 @@ impl LlmProvider for NearAiChatProvider {
};

let (input_tokens, output_tokens) = parse_usage(response.usage.as_ref());
let cached_tokens = parse_cached_tokens(response.usage.as_ref());
emit_context_shadow_usage(input_tokens, output_tokens, cached_tokens);

Ok(CompletionResponse {
content,
finish_reason,
input_tokens,
output_tokens,
reasoning: provider_reasoning,
cache_read_input_tokens: 0,
cache_read_input_tokens: cached_tokens.unwrap_or(0).min(input_tokens),
cache_creation_input_tokens: 0,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
})
}
Expand Down Expand Up @@ -696,14 +698,16 @@ impl LlmProvider for NearAiChatProvider {
};

let (input_tokens, output_tokens) = parse_usage(response.usage.as_ref());
let cached_tokens = parse_cached_tokens(response.usage.as_ref());
emit_context_shadow_usage(input_tokens, output_tokens, cached_tokens);

Ok(ToolCompletionResponse {
content,
tool_calls,
finish_reason,
input_tokens,
output_tokens,
cache_read_input_tokens: 0,
cache_read_input_tokens: cached_tokens.unwrap_or(0).min(input_tokens),
cache_creation_input_tokens: 0,
reasoning: provider_reasoning,
})
Expand Down Expand Up @@ -1143,6 +1147,16 @@ struct ChatCompletionUsage {
completion_tokens: Option<u64>,
#[serde(default)]
total_tokens: Option<u64>,
#[serde(default)]
prompt_tokens_details: Option<PromptTokensDetails>,
#[serde(default)]
cached_tokens: Option<u64>,
}

#[derive(Debug, Deserialize, Default)]
struct PromptTokensDetails {
#[serde(default)]
cached_tokens: Option<u64>,
}

fn saturate_u32(val: u64) -> u32 {
Expand All @@ -1165,6 +1179,33 @@ fn emit_reasoning_trace(reasoning: Option<&str>) {
}
}

fn emit_context_shadow_usage(
prompt_tokens: u32,
completion_tokens: u32,
cached_tokens: Option<u32>,
) {
const CONTEXT_SHADOW_TARGET: &str = "ironclaw::reborn::context_shadow";
let cached_tokens_field = cached_tokens.map(i64::from).unwrap_or(-1);
if let Some(cached_tokens) = cached_tokens.filter(|_| prompt_tokens > 0) {
tracing::debug!(
target: CONTEXT_SHADOW_TARGET,
prompt_tokens,
completion_tokens,
cached_tokens = cached_tokens_field,
cache_hit_ratio = cached_tokens as f64 / prompt_tokens as f64,
"nearai chat usage shadow measurement"
);
} else {
tracing::debug!(
target: CONTEXT_SHADOW_TARGET,
prompt_tokens,
completion_tokens,
cached_tokens = cached_tokens_field,
"nearai chat usage shadow measurement"
);
}
}

fn parse_usage(usage: Option<&ChatCompletionUsage>) -> (u32, u32) {
let Some(u) = usage else {
return (0, 0);
Expand All @@ -1181,6 +1222,16 @@ fn parse_usage(usage: Option<&ChatCompletionUsage>) -> (u32, u32) {
(input, output)
}

fn parse_cached_tokens(usage: Option<&ChatCompletionUsage>) -> Option<u32> {
let usage = usage?;
usage
.prompt_tokens_details
.as_ref()
.and_then(|details| details.cached_tokens)
.or(usage.cached_tokens)
.map(saturate_u32)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -2744,6 +2795,8 @@ mod tests {
prompt_tokens: Some(100),
completion_tokens: Some(50),
total_tokens: Some(150),
prompt_tokens_details: None,
cached_tokens: None,
};
assert_eq!(parse_usage(Some(&usage)), (100, 50));
}
Expand All @@ -2759,6 +2812,8 @@ mod tests {
prompt_tokens: Some(100),
completion_tokens: None,
total_tokens: Some(180),
prompt_tokens_details: None,
cached_tokens: None,
};
// output = total - prompt = 80
assert_eq!(parse_usage(Some(&usage)), (100, 80));
Expand All @@ -2770,6 +2825,8 @@ mod tests {
prompt_tokens: None,
completion_tokens: None,
total_tokens: Some(200),
prompt_tokens_details: None,
cached_tokens: None,
};
// input = 0 (no prompt), output = total = 200
assert_eq!(parse_usage(Some(&usage)), (0, 200));
Expand All @@ -2781,6 +2838,8 @@ mod tests {
prompt_tokens: None,
completion_tokens: None,
total_tokens: None,
prompt_tokens_details: None,
cached_tokens: None,
};
assert_eq!(parse_usage(Some(&usage)), (0, 0));
}
Expand Down Expand Up @@ -2935,13 +2994,80 @@ mod tests {
assert_eq!(usage.total_tokens, Some(500));
}

#[test]
fn test_usage_deserialize_nested_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"prompt_tokens_details": {
"cached_tokens": 80
}
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_usage(Some(&usage)), (100, 25));
assert_eq!(parse_cached_tokens(Some(&usage)), Some(80));
}

#[test]
fn test_usage_deserialize_top_level_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"cached_tokens": 40
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), Some(40));
}

#[test]
fn test_usage_deserialize_prefers_nested_cached_tokens() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125,
"prompt_tokens_details": {
"cached_tokens": 80
},
"cached_tokens": 40
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), Some(80));
}

#[test]
fn test_usage_deserialize_cached_tokens_absent() {
let json = r#"{
"prompt_tokens": 100,
"completion_tokens": 25,
"total_tokens": 125
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_cached_tokens(Some(&usage)), None);
}

#[test]
fn test_usage_without_details_still_parses_token_counts() {
let json = r#"{
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15
}"#;
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert_eq!(parse_usage(Some(&usage)), (10, 5));
assert_eq!(parse_cached_tokens(Some(&usage)), None);
}

#[test]
fn test_usage_deserialize_empty_object() {
let json = "{}";
let usage: ChatCompletionUsage = serde_json::from_str(json).unwrap();
assert!(usage.prompt_tokens.is_none());
assert!(usage.completion_tokens.is_none());
assert!(usage.total_tokens.is_none());
assert!(usage.prompt_tokens_details.is_none());
assert!(usage.cached_tokens.is_none());
}

// -- ChatCompletionToolCall serde roundtrip --------------------------------
Expand Down
3 changes: 3 additions & 0 deletions crates/ironclaw_loop_support/src/capability_port.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1329,6 +1329,9 @@ impl LoopCapabilityPort for HostRuntimeLoopCapabilityPort {
Ok(VisibleCapabilitySurface {
version,
descriptors,
// Empty = "callable == advertised". A disclosure decorator that narrows
// the advertised set populates this with the wider reachable catalog.
callable_capability_ids: Vec::new(),
})
}

Expand Down
Loading
Loading