Skip to content

Commit 49d1fcc

Browse files
committed
squash BrainstoreTrace LLM convo bug
brainstore trace llm convo required a root span for DFS traversal. This didn't work inside of trace scorers because the root span of an eval is not reported until after scoring completes. This change updates the logic to run convo dfs over forest roots in addition to the real root
1 parent 547d156 commit 49d1fcc

2 files changed

Lines changed: 77 additions & 8 deletions

File tree

braintrust-sdk/src/main/java/dev/braintrust/trace/BrainstoreTrace.java

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,14 @@ public List<Map<String, Object>> getSpans(@Nonnull String spanType) {
146146
public List<Map<String, Object>> getLLMConversationThread() {
147147
var allSpans = getSpans();
148148

149-
// Build children map: parent_id → children sorted by start_time
149+
// Build children map: parent_id → children sorted by start_time, and collect the set of
150+
// span IDs present in the fetched results.
150151
var children = new java.util.LinkedHashMap<String, List<Map<String, Object>>>();
152+
var presentSpanIds = new java.util.HashSet<String>();
151153
for (var span : allSpans) {
154+
if (span.get("span_id") instanceof String sid) {
155+
presentSpanIds.add(sid);
156+
}
152157
var parents = span.get("span_parents");
153158
if (parents instanceof List<?> parentList && !parentList.isEmpty()) {
154159
if (parentList.get(0) instanceof String pid) {
@@ -163,24 +168,34 @@ public List<Map<String, Object>> getLLMConversationThread() {
163168
(a, b) ->
164169
Double.compare(getStartTime(a), getStartTime(b))));
165170

166-
// Find root span (no parents)
167-
var root =
171+
// Find forest roots: spans with no parents, or whose parent is absent from the fetched
172+
// set (their true ancestor hasn't been ingested yet). Sort by start time so the DFS
173+
// visits orphan subtrees in chronological order.
174+
var forestRoots =
168175
allSpans.stream()
169176
.filter(
170177
s -> {
171178
var p = s.get("span_parents");
172-
return p == null || (p instanceof List<?> l && l.isEmpty());
179+
if (p == null || (p instanceof List<?> l && l.isEmpty())) {
180+
return true;
181+
}
182+
return p instanceof List<?> l
183+
&& l.get(0) instanceof String pid
184+
&& !presentSpanIds.contains(pid);
173185
})
174-
.findFirst()
175-
.orElse(null);
176-
if (root == null) return List.of();
186+
.sorted((a, b) -> Double.compare(getStartTime(a), getStartTime(b)))
187+
.toList();
188+
if (forestRoots.isEmpty()) return List.of();
177189

178190
// Pre-order DFS to get all LLM spans in hierarchy order.
179191
// Prune entire subtrees rooted at scorer spans (purpose == "scorer") — these are
180192
// synthetic spans injected by the Braintrust backend and not part of the real trace.
181193
var llmSpansInOrder = new ArrayList<Map<String, Object>>();
182194
var stack = new java.util.ArrayDeque<Map<String, Object>>();
183-
stack.push(root);
195+
// Push forest roots in reverse start order so the earliest is processed first.
196+
for (int i = forestRoots.size() - 1; i >= 0; i--) {
197+
stack.push(forestRoots.get(i));
198+
}
184199
while (!stack.isEmpty()) {
185200
var span = stack.pop();
186201
if ("automation".equals(getSpanType(span))) {

braintrust-sdk/src/test/java/dev/braintrust/trace/BrainstoreTraceTest.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,60 @@ void getLLMConversationThreadPrunesAutomationSubtrees() {
370370
assertEquals(choice, thread.get(1));
371371
}
372372

373+
@Test
374+
void getLLMConversationThreadWorksWithoutRootSpan() {
375+
// Trace scorers run mid-flight: spans close bottom-up, so the parent/root span may not
376+
// yet be ingested. Here the task span's parent ("root") is absent from the fetched set,
377+
// making the task span a forest root. The thread must still be reconstructed.
378+
var sysMsg = Map.<String, Object>of("role", "system", "content", "be helpful");
379+
var userMsg = Map.<String, Object>of("role", "user", "content", "strawberry");
380+
var assistantMsg = Map.<String, Object>of("role", "assistant", "content", "fruit");
381+
var choice =
382+
Map.<String, Object>of(
383+
"finish_reason", "stop", "index", 0, "message", assistantMsg);
384+
385+
// task span points at a "root" parent that was NOT fetched (root not ended/ingested yet)
386+
var task = taskSpan("task", "root", 1.0);
387+
var llm = llmSpan("llm1", "task", 1.1, List.of(sysMsg, userMsg), List.of(choice));
388+
389+
var trace = traceWithSpans(List.of(task, llm)); // no root span present
390+
var thread = trace.getLLMConversationThread();
391+
392+
assertEquals(3, thread.size(), "thread must be reconstructed even without the root span");
393+
assertEquals("system", thread.get(0).get("role"));
394+
assertEquals("user", thread.get(1).get("role"));
395+
assertEquals(assistantMsg, thread.get(2).get("message"));
396+
}
397+
398+
@Test
399+
void getLLMConversationThreadHandlesMultipleOrphanSubtrees() {
400+
// Multiple orphan subtrees whose common ancestor ("root") is absent. Each subtree is a
401+
// forest root; they must be visited in start-time order and their messages concatenated.
402+
var user1 = Map.<String, Object>of("role", "user", "content", "Q1");
403+
var asst1 = Map.<String, Object>of("role", "assistant", "content", "A1");
404+
var choice1 = Map.<String, Object>of("finish_reason", "stop", "message", asst1);
405+
406+
var user2 = Map.<String, Object>of("role", "user", "content", "Q2");
407+
var asst2 = Map.<String, Object>of("role", "assistant", "content", "A2");
408+
var choice2 = Map.<String, Object>of("finish_reason", "stop", "message", asst2);
409+
410+
// Two task subtrees both parented at a missing "root". Provide out of order; turn2 starts
411+
// later than turn1, so turn1's messages must come first.
412+
var turn2 = taskSpan("turn2", "root", 2.0);
413+
var llm2 = llmSpan("llm2", "turn2", 2.1, List.of(user2), List.of(choice2));
414+
var turn1 = taskSpan("turn1", "root", 1.0);
415+
var llm1 = llmSpan("llm1", "turn1", 1.1, List.of(user1), List.of(choice1));
416+
417+
var trace = traceWithSpans(List.of(turn2, llm2, turn1, llm1)); // root absent, out of order
418+
var thread = trace.getLLMConversationThread();
419+
420+
assertEquals(4, thread.size());
421+
assertEquals("Q1", thread.get(0).get("content"));
422+
assertEquals(asst1, thread.get(1).get("message"));
423+
assertEquals("Q2", thread.get(2).get("content"));
424+
assertEquals(asst2, thread.get(3).get("message"));
425+
}
426+
373427
// -------------------------------------------------------------------------
374428
// fetchWithRetry — retry logic (via package-private constructor with custom supplier)
375429
// -------------------------------------------------------------------------

0 commit comments

Comments
 (0)