Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ datasets/
shells/
ckpts
log_bkp/
scripts/main.py
scripts/rmb_main.py
output.txt
gravity*
*.sh
*.zip
110 changes: 110 additions & 0 deletions scripts/configs/bt_awac/rpl/apl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
algorithm:
class: BTAWAC
beta: 0.3333
max_exp_clip: 100.0
reward_reg: 0.0
rm_label: true

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: walker2d-gravity-1.0 #halfcheetah-gravity-1.0
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

eval_env: walker2d-gravity-1.0
eval_env_kwargs:
eval_env_wrapper:
eval_env_wrapper_kwargs:

replay: false
label_key: rl_sum

optim:
default:
class: Adam
lr: 0.0003

network:
reward:
class: EnsembleMLP
ensemble_size: 1
hidden_dims: [256, 256, 256]
reward_act: identity #sigmoid
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256, 256]


rm_dataset:
- class: RPLComparisonDataset
env: <env>
batch_size: 64
segment_length: null
label_key: <label_key>
replay: <replay>
odrl: true

rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: APLOfflineDataset
env: <eval_env>
batch_size: 256
replay: <replay>
odrl: true

rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_steps: 100000
rl_steps: 500000
log_freq: 500
profile_freq: 500
eval_freq: 5000
label_reward: true
normalize_reward: true

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: RPLComparisonDataset
env: <eval_env> #<env>
batch_size: 32
label_key: rl_reward_sum
eval: true
replay: <replay>
odrl: true

rl_eval:
function: eval_offline
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
T_max: 500000

processor: null
113 changes: 113 additions & 0 deletions scripts/configs/bt_awac/rpl/multi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
algorithm:
class: BTAWAC
beta: 0.3333
max_exp_clip: 100.0
reward_reg: 0.0
rm_label: true

num_tasks: 5
task_name: 0.1_0.5_1.0_2.0_5.0

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: halfcheetah-gravity-1.0
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

# eval_env: halfcheetah-gravity-1.0
# eval_env_kwargs:
# eval_env_wrapper:
# eval_env_wrapper_kwargs:

replay: true
label_key: rl_dir

optim:
default:
class: Adam
lr: 0.0003

network:
reward:
class: EnsembleMLP
ensemble_size: 1
hidden_dims: [256, 256, 256]
reward_act: sigmoid
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256, 256]


rm_dataset:
- class: MultiRPLComparisonDataset
label_key: <label_key>
num_tasks: <num_tasks>
env: <env>
task_name: <task_name>
batch_size: 64
odrl: true

rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: RPLOfflineDataset
env: <env>
batch_size: 256
replay: <replay>
odrl: true

rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_steps: 100000
rl_steps: 500000
log_freq: 500
profile_freq: 500
eval_freq: 10000
label_reward: true
normalize_reward: true

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: RPLComparisonDataset
env: <env> #<env>
batch_size: 32
label_key: rl_reward_sum
eval: true
replay: <replay>
odrl: true

rl_eval:
function: eval_offline
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
T_max: 500000

processor: null
111 changes: 111 additions & 0 deletions scripts/configs/bt_awac/rpl/multi_d4rl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
algorithm:
class: BTAWAC
beta: 0.3333
max_exp_clip: 100.0
reward_reg: 0.0
rm_label: true

num_tasks: 5
task_name: 0.1_0.5_1.0_2.0_5.0

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: halfcheetah-gravity-1.0
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

# eval_env: halfcheetah-gravity-1.0
# eval_env_kwargs:
# eval_env_wrapper:
# eval_env_wrapper_kwargs:

replay: true
label_key: rl_dir

optim:
default:
class: Adam
lr: 0.0003

network:
reward:
class: EnsembleMLP
ensemble_size: 1
hidden_dims: [256, 256, 256]
reward_act: sigmoid
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256, 256]


rm_dataset:
- class: MultiRPLComparisonDataset
label_key: <label_key>
num_tasks: <num_tasks>
env: <env>
task_name: <task_name>
batch_size: 64
odrl: true

rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: D4RLOfflineDataset
env: <env>
batch_size: 256

rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_steps: 100000
rl_steps: 500000
log_freq: 500
profile_freq: 500
eval_freq: 10000
label_reward: true
normalize_reward: true

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: RPLComparisonDataset
env: <env> #<env>
batch_size: 32
label_key: rl_reward_sum
eval: true
replay: <replay>
odrl: true

rl_eval:
function: eval_offline
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
T_max: 500000

processor: null
Loading