Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions configs/acereason_math/stage1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ name = "stage1"
batch_size = 1024
rollouts_per_example = 8

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 8192
max_completion_tokens = 8192

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
args = { dataset_name = "nvidia/AceReason-Math", dataset_subset = "default", question_key = "problem", math_verify_max_workers = 128, math_verify_timeout = 60 }

Expand Down
6 changes: 3 additions & 3 deletions configs/acereason_math/stage2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ name = "stage2"
batch_size = 2048
rollouts_per_example = 16

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 16384
max_completion_tokens = 16384

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
args = { dataset_name = "nvidia/AceReason-Math", dataset_subset = "default", question_key = "problem", math_verify_max_workers = 128, math_verify_timeout = 60 }

Expand Down
6 changes: 3 additions & 3 deletions configs/alphabet_sort/rl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
batch_size = 512
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 512
[orchestrator.train.sampling]
max_completion_tokens = 512

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "alphabet-sort"
name = "alphabet-sort"
args = { min_turns = 2, max_turns = 2 }
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/alphabet_sort/start.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ lr = 1e-5
batch_size = 128
rollouts_per_example = 8

[orchestrator.sampling]
max_tokens = 384
[orchestrator.train.sampling]
max_completion_tokens = 384

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "alphabet-sort"
name = "alphabet-sort"
args = { min_turns = 2, max_turns = 2, min_names_per_turn = 1, max_names_per_turn = 3, similarity_power = 4, power_per_turn = false }
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/alphabet_sort_branch/start.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ lr = 1e-5
batch_size = 128
rollouts_per_example = 8

[orchestrator.sampling]
max_tokens = 1024
[orchestrator.train.sampling]
max_completion_tokens = 1024

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "primeintellect/alphabet-sort"
name = "alphabet-sort"
args = { min_turns = 2, max_turns = 2, min_names_per_turn = 1, max_names_per_turn = 3, similarity_power = 4, power_per_turn = false }
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/rl/resume.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ lr = 3e-6
batch_size = 128
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 128
[orchestrator.train.sampling]
max_completion_tokens = 128

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "reverse-text"

[inference]
6 changes: 3 additions & 3 deletions configs/ci/integration/rl/start.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ lr = 3e-6
batch_size = 128
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 128
[orchestrator.train.sampling]
max_completion_tokens = 128

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "reverse-text"

[inference]
6 changes: 3 additions & 3 deletions configs/ci/integration/rl_lora/resume.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ rollouts_per_example = 16
[orchestrator.model.lora]
name = "r8-1e-4"

[orchestrator.sampling]
max_tokens = 128
[orchestrator.train.sampling]
max_completion_tokens = 128

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "reverse-text"

[inference]
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/rl_lora/start.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ rollouts_per_example = 16
[orchestrator.model.lora]
name = "r8-1e-4"

[orchestrator.sampling]
max_tokens = 128
[orchestrator.train.sampling]
max_completion_tokens = 128

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "reverse-text"

[inference]
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/rl_moe/start.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ lr = 3e-6
batch_size = 128
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 128
[orchestrator.train.sampling]
max_completion_tokens = 128

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "reverse-text"

[inference]
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/integration/rl_multi_run/orchestrator.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ name = "PrimeIntellect/Qwen3-0.6B-Reverse-Text-SFT"
[optim]
lr = 3e-5

[sampling]
max_tokens = 128
[train.sampling]
max_completion_tokens = 128

[[env]]
[[train.env]]
id = "reverse-text"

[ckpt]
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/nightly/acereason_math.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
batch_size = 1024
rollouts_per_example = 8

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 8192
max_completion_tokens = 8192

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "acereason-math"
args = { dataset_name = "nvidia/AceReason-Math", dataset_subset = "default", question_key = "problem", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand Down
6 changes: 3 additions & 3 deletions configs/ci/nightly/multimodal_color_codeword.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ language_model_attr = "model.language_model"
batch_size = 256
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 64
[orchestrator.train.sampling]
max_completion_tokens = 64

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "color-codeword"
args = { images_per_turn = 1, max_turns = 3, num_examples = 1000, seed = 42 }

Expand Down
4 changes: 2 additions & 2 deletions configs/debug/orch.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ max_steps = 5
max_async_level = 5
batch_size = 16

[sampling]
max_tokens = 16
[train.sampling]
max_completion_tokens = 16

6 changes: 3 additions & 3 deletions configs/deepscaler/stage1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ interval = 100
batch_size = 1024
rollouts_per_example = 8

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 8192
max_completion_tokens = 8192

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "deepscaler"
args = { dataset_name= "agentica-org/DeepScaleR-Preview-Dataset", dataset_subset = "default", question_key = "problem", answer_key = "solution", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand Down
6 changes: 3 additions & 3 deletions configs/deepscaler/stage2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ resume_step = 500
batch_size = 1024
rollouts_per_example = 8

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 16384
max_completion_tokens = 16384

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "deepscaler"
args = { dataset_name= "agentica-org/DeepScaleR-Preview-Dataset", dataset_subset = "default", question_key = "problem", answer_key = "solution", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand Down
6 changes: 3 additions & 3 deletions configs/deepscaler/stage3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ resume_step = 1000
batch_size = 1024
rollouts_per_example = 8

[orchestrator.sampling]
[orchestrator.train.sampling]
temperature = 0.6
max_tokens = 24576
max_completion_tokens = 24576

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "deepscaler"
args = { dataset_name= "agentica-org/DeepScaleR-Preview-Dataset", dataset_subset = "default", question_key = "problem", answer_key = "solution", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand Down
6 changes: 3 additions & 3 deletions configs/elastic/rl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ lr = 1e-5
batch_size = 512
rollouts_per_example = 8

[orchestrator.sampling]
max_tokens = 768
[orchestrator.train.sampling]
max_completion_tokens = 768

[orchestrator.client.elastic]
hostname = "localhost"
port = 8000
sync_interval = 5.0

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "alphabet-sort"
name = "alphabet-sort"
args = { min_turns = 3, max_turns = 5, power_per_turn = false }
10 changes: 5 additions & 5 deletions configs/env_mix/env_mix.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
batch_size = 512
rollouts_per_example = 16

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "math"
args = { min_avg_reward = 0.1, rubric_max_workers = 128 }

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "code-env"
name = "code"
args = { min_avg_reward = 0.1 }

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "science-env"
name = "science"
args = { min_avg_reward = 0.1, pool_size = 128, rubric_max_workers = 128 }

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "logic-env"
name = "logic"
args = { min_avg_reward = 0.1 }

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "math-python"
args = { min_avg_reward = 0.1, sandbox_client_max_workers = 128, sandbox_timeout_minutes = 10, sandbox_memory_gb = 1, sandbox_disk_size_gb = 1, pip_install_packages = "numpy sympy", python_tool = true, rubric_max_workers = 128 }
Expand Down
6 changes: 3 additions & 3 deletions configs/gsm8k/rl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ name = "PrimeIntellect/Qwen3-0.6B"
batch_size = 512
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 2048
[orchestrator.train.sampling]
max_completion_tokens = 2048

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "gsm8k"
args = { dataset_name = "openai/gsm8k", dataset_subset = "main", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand Down
8 changes: 4 additions & 4 deletions configs/hendrycks_math/rl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
batch_size = 512
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 2048
[orchestrator.train.sampling]
max_completion_tokens = 2048

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "hendrycks-math"
args = { dataset_name = "PrimeIntellect/Hendrycks-Math", dataset_subset = "default", math_verify_max_workers = 128, math_verify_timeout = 60 }
Expand All @@ -28,7 +28,7 @@ hard_threshold = 0.0
interval = 10

[orchestrator.eval.sampling]
max_tokens = 2048
max_completion_tokens = 2048

[[orchestrator.eval.env]]
id = "math500"
Expand Down
2 changes: 1 addition & 1 deletion configs/hendrycks_math/sanity.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
batch_size = 512
rollouts_per_example = 8

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
args = { dataset_name = "mikasenghaas/Sanity-Test-R1D-1.5B", dataset_subset = "default" }
name = "hendrycks-math"
Expand Down
4 changes: 2 additions & 2 deletions configs/math_group/rl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
batch_size = 256
rollouts_per_example = 8

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "hendrycks-math"
args = { dataset_name = "PrimeIntellect/Hendrycks-Math", dataset_subset = "default" }
ratio = 0.5

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-env"
name = "acereason-math"
args = { dataset_name = "nvidia/AceReason-Math", dataset_subset = "default", question_key = "problem" }
Expand Down
6 changes: 3 additions & 3 deletions configs/math_python/math_python.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
batch_size = 512
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 512
[orchestrator.train.sampling]
max_completion_tokens = 512

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "math-python"
args = { sandbox_client_max_workers = 128, sandbox_timeout_minutes = 10, sandbox_memory_gb = 1, sandbox_disk_size_gb = 1, pip_install_packages = "numpy sympy" }

Expand Down
6 changes: 3 additions & 3 deletions configs/multimodal/rl_color_codeword.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ batch_size = 256
rollouts_per_example = 16


[orchestrator.sampling]
max_tokens = 64
[orchestrator.train.sampling]
max_completion_tokens = 64

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "color-codeword"
args = { images_per_turn = 1, max_turns = 3, num_examples = 1000, seed = 42 }

Expand Down
6 changes: 3 additions & 3 deletions configs/multimodal/rl_color_codeword_test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ language_model_attr = "model.language_model"
batch_size = 16
rollouts_per_example = 2

[orchestrator.sampling]
max_tokens = 32
[orchestrator.train.sampling]
max_completion_tokens = 32

[[orchestrator.env]]
[[orchestrator.train.env]]
id = "color-codeword"
args = { images_per_turn = 1, max_turns = 2, num_examples = 100, seed = 42 }

Expand Down
Loading
Loading