Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-05-24 - [Avoid `dict.setdefault` in hot loops]
**Learning:** `dict.setdefault(key, default)` evaluates the `default` expression eagerly on *every* loop iteration before performing the membership check. In cases where the default value requires complex allocation or computation (e.g., `{"value_sum": [Fraction(0) for _ in value]}`), this results in significant performance degradation in hot loops.
**Action:** Replace `dict.setdefault` in hot loops with explicit `if key not in dict:` membership checks and lazy assignment.
1 change: 0 additions & 1 deletion scripts/export_h2_bundle_lock_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from utils import detect_runtime_environment

Expand Down
1 change: 0 additions & 1 deletion scripts/export_p3_paper_freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import json
from pathlib import Path
import re
from typing import Any

from utils import detect_runtime_environment

Expand Down
1 change: 0 additions & 1 deletion scripts/export_p5_public_surface_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from utils import detect_runtime_environment

Expand Down
2 changes: 1 addition & 1 deletion scripts/export_r10_d0_same_endpoint_cost_attribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
subroutine_braid_long_program,
verify_program,
)
from exec_trace import ExecutionState, Program, TraceEvent, replay_trace, TraceInterpreter
from exec_trace import Program, TraceEvent, replay_trace, TraceInterpreter
from geometry import brute_force_hardmax_2d
from model.exact_hardmax import encode_latest_write_query
from model.free_running_executor import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from pathlib import Path
import sys
from typing import Any

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
Expand Down
2 changes: 1 addition & 1 deletion scripts/export_r2_systems_baseline_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from collections import Counter, defaultdict
from collections import defaultdict
import csv
import json
from pathlib import Path
Expand Down
1 change: 0 additions & 1 deletion scripts/export_r3_d0_exact_execution_stress_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
from pathlib import Path
from typing import Any

from bytecode import (
lower_program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BytecodeMemoryRegion,
BytecodeOpcode,
BytecodeProgram,
RestrictedFrontendTranslationCase,
compile_restricted_frontend_program,
first_divergence_step,
lower_program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BytecodeMemoryRegion,
BytecodeOpcode,
BytecodeProgram,
RestrictedTinyCLoweringCase,
compile_restricted_tinyc_program,
first_divergence_step,
lower_program,
Expand Down
1 change: 0 additions & 1 deletion scripts/export_release_worktree_hygiene_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
from pathlib import Path
import subprocess
from typing import Any

from utils import detect_runtime_environment

Expand Down
10 changes: 5 additions & 5 deletions src/geometry/hull_kv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
HardmaxResult,
NumberLike,
ValueLike,
_as_fraction,
_coerce_key,
_coerce_value,
_normalize_number,
Expand Down Expand Up @@ -205,10 +204,11 @@ def _rebuild_if_needed(self) -> None:
total_value_sum = [Fraction(0) for _ in range(self._value_width or 0)]

for index, (key, value) in enumerate(self._entries):
bucket = aggregates.setdefault(
key,
{"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []},
)
# Performance optimization: Replace `setdefault` with an explicit membership check
# to avoid eagerly allocating `[Fraction(0) for _ in value]` on every loop iteration.
if key not in aggregates:
aggregates[key] = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
bucket = aggregates[key]
Comment on lines +207 to +211

Copilot AI Apr 23, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This membership-check pattern does two dict lookups in the common case (key not in aggregates then aggregates[key]). Since this is in a hot loop, consider switching to a single-lookup lazy init (e.g., bucket = aggregates.get(key) with a sentinel/None check, or try: bucket = aggregates[key] / except KeyError:) to keep the optimization from regressing lookup overhead.

Suggested change
# Performance optimization: Replace `setdefault` with an explicit membership check
# to avoid eagerly allocating `[Fraction(0) for _ in value]` on every loop iteration.
if key not in aggregates:
aggregates[key] = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
bucket = aggregates[key]
# Performance optimization: avoid `setdefault` so the default bucket is
# only allocated for missing keys, while also avoiding a second dict
# lookup in the common case where the bucket already exists.
bucket = aggregates.get(key)
if bucket is None:
bucket = {"value_sum": [Fraction(0) for _ in value], "count": 0, "entry_indices": []}
aggregates[key] = bucket

Copilot uses AI. Check for mistakes.
for coord_index, coord in enumerate(value):
bucket["value_sum"][coord_index] += coord
total_value_sum[coord_index] += coord
Expand Down
11 changes: 6 additions & 5 deletions src/model/free_running_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,14 +697,15 @@ def evaluate_free_running_programs(

outcomes.append(outcome)
bucket = bucket_name(outcome.program_steps)
bucket_state = per_bucket.setdefault(
bucket,
{
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid allocating new dictionary objects on every iteration.
if bucket not in per_bucket:
per_bucket[bucket] = {
"program_count": 0,
"exact_trace_count": 0,
"exact_final_state_count": 0,
},
)
}
bucket_state = per_bucket[bucket]
Comment on lines +700 to +708

Copilot AI Apr 23, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if bucket not in per_bucket: ...; bucket_state = per_bucket[bucket] pattern performs two dict lookups. Since this is inside the main evaluation loop, consider a single-lookup lazy init (e.g., state = per_bucket.get(bucket) then initialize if missing, or a try/except KeyError pattern) to reduce per-iteration overhead while still avoiding eager default allocation.

Copilot uses AI. Check for mistakes.
bucket_state["program_count"] += 1
bucket_state["exact_trace_count"] += int(outcome.exact_trace_match)
bucket_state["exact_final_state_count"] += int(outcome.exact_final_state_match)
Expand Down
6 changes: 5 additions & 1 deletion src/model/induced_causal.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,11 @@ def fit_transition_library(
examples = build_transition_examples(programs, interpreter=interpreter)
by_opcode: dict[Opcode, list[TransitionExample]] = {}
for example in examples:
by_opcode.setdefault(example.opcode, []).append(example)
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid list allocation overhead on every loop iteration.
if example.opcode not in by_opcode:
by_opcode[example.opcode] = []
by_opcode[example.opcode].append(example)

rules = tuple(_fit_rule_for_opcode(by_opcode[opcode], opcode) for opcode in sorted(by_opcode, key=str))
return InducedTransitionLibrary(rules=rules)
Expand Down
2 changes: 1 addition & 1 deletion src/model/r45_dual_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass

from bytecode import BoundedMemoryVMCase, lower_program, r43_bounded_memory_vm_cases
from bytecode import lower_program, r43_bounded_memory_vm_cases
from exec_trace import Program, TraceInterpreter

from .exact_hardmax import extract_stack_slot_operations
Expand Down
17 changes: 11 additions & 6 deletions src/model/softmax_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,15 +582,16 @@ def evaluate_teacher_forced_model(
total_correct += correct

bucket = baseline_bucket_name(example.program_steps)
bucket_state = per_bucket.setdefault(
bucket,
{
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid dictionary allocation overhead on every loop iteration.
if bucket not in per_bucket:
per_bucket[bucket] = {
"example_count": 0,
"token_count": 0,
"correct_tokens": 0,
"weighted_loss": 0.0,
},
)
}
bucket_state = per_bucket[bucket]
bucket_state["example_count"] = int(bucket_state["example_count"]) + 1
bucket_state["token_count"] = int(bucket_state["token_count"]) + token_count
bucket_state["correct_tokens"] = int(bucket_state["correct_tokens"]) + correct
Expand Down Expand Up @@ -753,7 +754,11 @@ def evaluate_free_running_rollout(
)

bucket = baseline_bucket_name(example.program_steps)
bucket_state = per_bucket.setdefault(bucket, {"example_count": 0, "exact_count": 0})
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid dictionary allocation overhead on every loop iteration.
if bucket not in per_bucket:
per_bucket[bucket] = {"example_count": 0, "exact_count": 0}
bucket_state = per_bucket[bucket]
bucket_state["example_count"] = int(bucket_state["example_count"]) + 1
bucket_state["exact_count"] = int(bucket_state["exact_count"]) + int(exact)

Expand Down
21 changes: 17 additions & 4 deletions src/model/trainable_latest_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,11 @@ def exact_program_accuracy(scorer: TrainableLatestWriteScorer, samples: Sequence
return 0.0
per_program: dict[str, list[bool]] = {}
for sample in samples:
per_program.setdefault(sample.program_name, []).append(scorer.predict_index(sample) == sample.target_index)
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid list allocation overhead on every loop iteration.
if sample.program_name not in per_program:
per_program[sample.program_name] = []
per_program[sample.program_name].append(scorer.predict_index(sample) == sample.target_index)
exact = sum(1 for outcomes in per_program.values() if all(outcomes))
return exact / len(per_program)

Expand All @@ -198,12 +202,21 @@ def evaluate_scorer(
correct_samples += int(correct)

bucket = bucket_name(sample.program_steps)
bucket_state = per_bucket.setdefault(bucket, {"sample_count": 0, "sample_correct": 0, "programs": {}})
# Performance optimization: Replace `setdefault` with an explicit check
# to avoid dictionary allocation overhead on every loop iteration.
if bucket not in per_bucket:
per_bucket[bucket] = {"sample_count": 0, "sample_correct": 0, "programs": {}}
bucket_state = per_bucket[bucket]
bucket_state["sample_count"] = int(bucket_state["sample_count"]) + 1
bucket_state["sample_correct"] = int(bucket_state["sample_correct"]) + int(correct)
bucket_state["programs"].setdefault(sample.program_name, []).append(correct)

per_program.setdefault(sample.program_name, []).append(correct)
if sample.program_name not in bucket_state["programs"]:
bucket_state["programs"][sample.program_name] = []
bucket_state["programs"][sample.program_name].append(correct)

if sample.program_name not in per_program:
per_program[sample.program_name] = []
per_program[sample.program_name].append(correct)
program_steps[sample.program_name] = sample.program_steps

exact_programs = sum(1 for outcomes in per_program.values() if all(outcomes))
Expand Down
1 change: 0 additions & 1 deletion tests/test_bytecode_harness.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from bytecode import (
accumulator_loop_program,
alternating_memory_loop_bytecode_program,
arithmetic_smoke_program,
branch_then_call_false_program,
Expand Down
1 change: 0 additions & 1 deletion tests/test_bytecode_memory_surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from bytecode import (
analyze_memory_surfaces,
call_frame_roundtrip_program,
countdown_helper_call_program,
memory_surface_cases,
memory_surface_negative_programs,
run_memory_surface_case,
Expand Down
1 change: 0 additions & 1 deletion tests/test_model_softmax_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
evaluate_free_running_rollout,
evaluate_teacher_forced_model,
require_torch,
serialize_event_tokens,
SoftmaxBaselineConfig,
SoftmaxTrainingConfig,
summarize_trace_sequences,
Expand Down