Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-05-24 - [Avoid `setdefault` with Expensive Defaults in Hot Loops]
**Learning:** `dict.setdefault(key, complex_default)` evaluates the `complex_default` argument on *every* invocation, even if `key` is already present in the dictionary. In loops handling thousands of iterations, this can cause significant overhead, particularly if the default is a list comprehension involving complex types like `Fraction(0)`.
**Action:** Use an explicit membership check (`if key not in dict: dict[key] = complex_default`) instead of `setdefault` within hot loops to ensure the default is only evaluated when necessary.
1 change: 0 additions & 1 deletion scripts/export_h2_bundle_lock_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from utils import detect_runtime_environment

Expand Down
1 change: 0 additions & 1 deletion scripts/export_p3_paper_freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import json
from pathlib import Path
import re
from typing import Any

from utils import detect_runtime_environment

Expand Down
1 change: 0 additions & 1 deletion scripts/export_p5_public_surface_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from utils import detect_runtime_environment

Expand Down
2 changes: 1 addition & 1 deletion scripts/export_r10_d0_same_endpoint_cost_attribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
subroutine_braid_long_program,
verify_program,
)
from exec_trace import ExecutionState, Program, TraceEvent, replay_trace, TraceInterpreter
from exec_trace import Program, TraceEvent, replay_trace, TraceInterpreter
from geometry import brute_force_hardmax_2d
from model.exact_hardmax import encode_latest_write_query
from model.free_running_executor import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from pathlib import Path
import sys
from typing import Any

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
Expand Down
2 changes: 1 addition & 1 deletion scripts/export_r2_systems_baseline_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from collections import Counter, defaultdict
from collections import defaultdict
import csv
import json
from pathlib import Path
Expand Down
1 change: 0 additions & 1 deletion scripts/export_r3_d0_exact_execution_stress_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from bytecode import (
lower_program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BytecodeMemoryRegion,
BytecodeOpcode,
BytecodeProgram,
RestrictedFrontendTranslationCase,
compile_restricted_frontend_program,
first_divergence_step,
lower_program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BytecodeMemoryRegion,
BytecodeOpcode,
BytecodeProgram,
RestrictedTinyCLoweringCase,
compile_restricted_tinyc_program,
first_divergence_step,
lower_program,
Expand Down
1 change: 0 additions & 1 deletion scripts/export_release_worktree_hygiene_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
from pathlib import Path
import subprocess
from typing import Any

from utils import detect_runtime_environment

Expand Down
8 changes: 3 additions & 5 deletions src/geometry/hull_kv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
HardmaxResult,
NumberLike,
ValueLike,
_as_fraction,
_coerce_key,
_coerce_value,
_normalize_number,
Expand Down Expand Up @@ -205,10 +204,9 @@ def _rebuild_if_needed(self) -> None:
total_value_sum = [Fraction(0) for _ in range(self._value_width or 0)]

for index, (key, value) in enumerate(self._entries):
bucket = aggregates.setdefault(
key,
{"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []},
)
if key not in aggregates:
aggregates[key] = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
bucket = aggregates[key]
Comment on lines +207 to +209

Copilot AI Apr 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this hot loop, the new if key not in aggregates + bucket = aggregates[key] pattern performs two dictionary lookups on the common hit path (membership test + indexed read). To keep the intended performance win while avoiding the extra hash lookup, consider using a single-lookup pattern (e.g., bucket = aggregates.get(key) and initialize only on None, or try/except KeyError).

Suggested change
if key not in aggregates:
aggregates[key] = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
bucket = aggregates[key]
bucket = aggregates.get(key)
if bucket is None:
bucket = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
aggregates[key] = bucket

Copilot uses AI. Check for mistakes.
for coord_index, coord in enumerate(value):
bucket["value_sum"][coord_index] += coord
total_value_sum[coord_index] += coord
Expand Down
9 changes: 4 additions & 5 deletions src/model/free_running_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,14 +697,13 @@ def evaluate_free_running_programs(

outcomes.append(outcome)
bucket = bucket_name(outcome.program_steps)
bucket_state = per_bucket.setdefault(
bucket,
{
if bucket not in per_bucket:
per_bucket[bucket] = {
"program_count": 0,
"exact_trace_count": 0,
"exact_final_state_count": 0,
},
)
}
bucket_state = per_bucket[bucket]
bucket_state["program_count"] += 1
bucket_state["exact_trace_count"] += int(outcome.exact_trace_match)
bucket_state["exact_final_state_count"] += int(outcome.exact_final_state_match)
Expand Down
2 changes: 1 addition & 1 deletion src/model/r45_dual_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass

from bytecode import BoundedMemoryVMCase, lower_program, r43_bounded_memory_vm_cases
from bytecode import lower_program, r43_bounded_memory_vm_cases
from exec_trace import Program, TraceInterpreter

from .exact_hardmax import extract_stack_slot_operations
Expand Down
13 changes: 7 additions & 6 deletions src/model/softmax_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,15 +582,14 @@ def evaluate_teacher_forced_model(
total_correct += correct

bucket = baseline_bucket_name(example.program_steps)
bucket_state = per_bucket.setdefault(
bucket,
{
if bucket not in per_bucket:
per_bucket[bucket] = {
"example_count": 0,
"token_count": 0,
"correct_tokens": 0,
"weighted_loss": 0.0,
},
)
}
bucket_state = per_bucket[bucket]
bucket_state["example_count"] = int(bucket_state["example_count"]) + 1
bucket_state["token_count"] = int(bucket_state["token_count"]) + token_count
bucket_state["correct_tokens"] = int(bucket_state["correct_tokens"]) + correct
Expand Down Expand Up @@ -753,7 +752,9 @@ def evaluate_free_running_rollout(
)

bucket = baseline_bucket_name(example.program_steps)
bucket_state = per_bucket.setdefault(bucket, {"example_count": 0, "exact_count": 0})
if bucket not in per_bucket:
per_bucket[bucket] = {"example_count": 0, "exact_count": 0}
bucket_state = per_bucket[bucket]
bucket_state["example_count"] = int(bucket_state["example_count"]) + 1
bucket_state["exact_count"] = int(bucket_state["exact_count"]) + int(exact)

Expand Down
4 changes: 3 additions & 1 deletion src/model/trainable_latest_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ def evaluate_scorer(
correct_samples += int(correct)

bucket = bucket_name(sample.program_steps)
bucket_state = per_bucket.setdefault(bucket, {"sample_count": 0, "sample_correct": 0, "programs": {}})
if bucket not in per_bucket:
per_bucket[bucket] = {"sample_count": 0, "sample_correct": 0, "programs": {}}
bucket_state = per_bucket[bucket]
bucket_state["sample_count"] = int(bucket_state["sample_count"]) + 1
bucket_state["sample_correct"] = int(bucket_state["sample_correct"]) + int(correct)
bucket_state["programs"].setdefault(sample.program_name, []).append(correct)
Expand Down
1 change: 0 additions & 1 deletion tests/test_bytecode_harness.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from bytecode import (
accumulator_loop_program,
alternating_memory_loop_bytecode_program,
arithmetic_smoke_program,
branch_then_call_false_program,
Expand Down
1 change: 0 additions & 1 deletion tests/test_bytecode_memory_surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from bytecode import (
analyze_memory_surfaces,
call_frame_roundtrip_program,
countdown_helper_call_program,
memory_surface_cases,
memory_surface_negative_programs,
run_memory_surface_case,
Expand Down
1 change: 0 additions & 1 deletion tests/test_model_softmax_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
evaluate_free_running_rollout,
evaluate_teacher_forced_model,
require_torch,
serialize_event_tokens,
SoftmaxBaselineConfig,
SoftmaxTrainingConfig,
summarize_trace_sequences,
Expand Down