forked from Bick95/PPO
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoutput_net_modules.py
More file actions
42 lines (31 loc) · 1.4 KB
/
Copy pathoutput_net_modules.py
File metadata and controls
42 lines (31 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch.nn as nn
from constants import DISCRETE
class OutMLP(nn.Module):
# This module implicitly assumes that only one action is to be predicted per time step.
# It computes the parameterization for some probability distribution
def __init__(self,
output_features: int,
input_features: int = 50,
output_type: int = DISCRETE
):
super(OutMLP, self).__init__()
# Construct NN-processing pipeline consisting of concatenation of layers to be applied to any input
# (Using this pipeline-approach avoids if-statements for determining whether to apply the Softmax in forward pass)
self.pipeline = [
# Add output layer
nn.Linear(
in_features=input_features,
out_features=output_features
)
]
# Register all layers
for i, layer in enumerate(self.pipeline):
self.add_module("layer_mlp_out_" + str(i), layer)
# Add optional normalization of outputs in case of Discrete distribution over action space
if output_type is DISCRETE:
self.pipeline.append(nn.Softmax(dim=1))
def forward(self, x):
# Forward pass to compute the parameterization for the probability distribution following the policy network
for layer in self.pipeline:
x = layer(x)
return x