-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexp2_camera_control_cont.py
More file actions
86 lines (68 loc) · 4.24 KB
/
Copy pathexp2_camera_control_cont.py
File metadata and controls
86 lines (68 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import tensorflow as tf
from rl.agents import DDPGAgent
from rl.random import OrnsteinUhlenbeckProcess, GaussianWhiteNoiseProcess
import numpy as np
from keras.optimizers import RMSprop, Adam
from rl.memory import SequentialMemory
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
from rl_control.models import define_actor_critic_models
from rl_control.CameraEnviromentCont import CameraControlEnvCont
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
def train_model(seed=1):
np.random.seed(seed)
env = CameraControlEnvCont()
env.seed(seed)
actor, critic, action_input = define_actor_critic_models(actions=3)
memory = SequentialMemory(limit=10000, window_length=1)
random_process = GaussianWhiteNoiseProcess(mu=0, sigma=0.1, sigma_min=0.01, n_steps_annealing=49000, size=3)
agent = DDPGAgent(nb_actions=3, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500,
random_process=random_process, gamma=.1, target_model_update=1e-3, batch_size=32)
agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])
log_filename = 'results/drone_camera_cont_control_log.json'
model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model'
callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)]
callbacks += [FileLogger(log_filename, interval=1)]
agent.fit(env, nb_steps=50000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1,
callbacks=callbacks)
def evaluate_model(model_path=None, interactive=False, seed=12345):
np.random.seed(seed)
actor, critic, action_input = define_actor_critic_models(actions=3)
memory = SequentialMemory(limit=10000, window_length=1)
random_process = GaussianWhiteNoiseProcess(mu=0, sigma=0, sigma_min=0, n_steps_annealing=1)
agent = DDPGAgent(nb_actions=3, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=100,
random_process=random_process, gamma=.95, target_model_update=0.0001, batch_size=32)
agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])
if model_path is not None:
agent.load_weights(model_path)
# Train Evaluation
env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle', testing=False, interactive=interactive)
env.seed(seed)
res = agent.test(env, nb_episodes=500, nb_max_episode_steps=100, verbose=0, visualize=False)
train_mean_reward = np.mean(res.history['episode_reward'])
before_train_position_error = np.mean(np.abs(env.init_position_error_pixels))
before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
after_train_position_error = np.mean(np.abs(env.final_position_error_pixels))
after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
print("Training evaluation: ")
print("Mean reward: ", train_mean_reward)
print("Position: ", before_train_position_error, " -> ", after_train_position_error)
print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)
# Test Evaluation
env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle', testing=True, interactive=interactive)
env.seed(seed)
res = agent.test(env, nb_episodes=500, nb_max_episode_steps=100, verbose=0, visualize=False)
train_mean_reward = np.mean(res.history['episode_reward'])
before_train_position_error = np.mean(np.abs(env.init_position_error_pixels))
before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
after_train_position_error = np.mean(np.abs(env.final_position_error_pixels))
after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
print("Testing evaluation: ")
print("Mean reward: ", train_mean_reward)
print("Position: ", before_train_position_error, " -> ", after_train_position_error)
print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)
if __name__ == '__main__':
# train_model()
evaluate_model(model_path='models/drone_camera_cont_cnn_weights.model')