Pytorch Snippet
检测device # Get cpu, gpu or mps device for training. device = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) print(f"Using {device} device") Reproducibility def reproducibility(seed: int = 8848): import random import numpy as np import torch random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) 初始化权重 def init_params(model): for p in model.parameters(): if p.dim() > 1: nn.init.xavier_normal_(p) else: nn.init.normal_(p)
Advantage Actor-Critic (A2C)
from itertools import count import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import utils from IPython import display utils.reproducibility() class Actor(nn.Module): def __init__(self, in_dims: int, out_dims: int): super(Actor, self).__init__() hidden_dim = 128 self.layer = nn.Sequential( nn.Linear(in_dims, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, out_dims), nn.Softmax(dim=-1), ) self.apply(utils.init_weights) def forward(self, x): return self.layer(x) class Critic(nn.Module): def __init__(self, in_dims: int): super(Critic, self).__init__() hidden_dim = 128 self.layer = nn....
REINFORCE Algorithm
LunarLander-v2 from itertools import count import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import utils from IPython import display class Policy(nn.Module): def __init__(self, in_dims: int, out_dims: int): super(Policy, self).__init__() hidden_dim = 128 self.layer = nn.Sequential( nn.Linear(in_dims, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, out_dims), nn.Softmax(dim=-1), ) self.apply(utils.init_weights) def forward(self, x): return self.layer(x) class Baseline(nn.Module): def __init__(self, in_dims: int): super(Baseline, self).__init__() hidden_dim = 128 self....