Advantage Actor-Critic (A2C)

from itertools import count import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import utils from IPython import display utils.reproducibility() class Actor(nn.Module): def __init__(self, in_dims: int, out_dims: int): super(Actor, self).__init__() hidden_dim = 128 self.layer = nn.Sequential( nn.Linear(in_dims, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, out_dims), nn.Softmax(dim=-1), ) self.apply(utils.init_weights) def forward(self, x): return self.layer(x) class Critic(nn.Module): def __init__(self, in_dims: int): super(Critic, self).__init__() hidden_dim = 128 self.layer = nn....

June 4, 2024 · 2 min · 341 words · xuziye0327

REINFORCE Algorithm

LunarLander-v2 from itertools import count import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import utils from IPython import display class Policy(nn.Module): def __init__(self, in_dims: int, out_dims: int): super(Policy, self).__init__() hidden_dim = 128 self.layer = nn.Sequential( nn.Linear(in_dims, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, out_dims), nn.Softmax(dim=-1), ) self.apply(utils.init_weights) def forward(self, x): return self.layer(x) class Baseline(nn.Module): def __init__(self, in_dims: int): super(Baseline, self).__init__() hidden_dim = 128 self....

June 3, 2024 · 2 min · 400 words · xuziye0327