Advantage Actor-Critic (A2C)
from itertools import count import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import utils from IPython import display utils.reproducibility() class Actor(nn.Module): def __init__(self, in_dims: int, out_dims: int): super(Actor, self).__init__() hidden_dim = 128 self.layer = nn.Sequential( nn.Linear(in_dims, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, out_dims), nn.Softmax(dim=-1), ) self.apply(utils.init_weights) def forward(self, x): return self.layer(x) class Critic(nn.Module): def __init__(self, in_dims: int): super(Critic, self).__init__() hidden_dim = 128 self.layer = nn....