DQN-PacMan/Driver.py at stable · therealcyberlord/DQN-PacMan · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# will collect experiences based on the policy
from tqdm import tqdm
import random
from Configs import num_actions


class DynamicStepDriver:
    def __init__(self, env, replay_buffer):
        self.env = env
        self.replay_buffer = replay_buffer

    # collect data for the replay buffer
    def collect(self, num_steps):
        print("Collecting experiences")
        done = False
        state = self.env.reset()


        for step in tqdm(range(num_steps)):
            # random action
            action = random.randint(0, num_actions-1)

            # stepping into the environment
            next_state, reward, done, info = self.env.step(action)

            if done:
                next_state = None
                state = self.env.reset()

            self.replay_buffer.push((state, action, next_state, reward))

            # if next_state is not none, we would like to continue in the environment
            if next_state is not None:
                state = next_state