# JUNTO Practice: Goofspiel Bots (Part 3)

Discussed on January 13, 2021.

Improve your Goofspiel bots. We will have them compete again during the next meeting.

## Solutions

Click to see:

### Oscar Martinez

``````import numpy as np
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import itertools
from collections import namedtuple
from .goofspiel import *

device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)
device = "cpu"

GAMMA = 0.999
NU = 0.2
EPS_START = 1
EPS_END = 0.05
EPS_DECAY = 1000  # higher is slower
n_actions = 13

class GoofNFSP_Q_GRUNet(nn.Module):
def __init__(
self,
n_players,
hidden_dim=256,
n_layers=2,
dropout_p=0.2,
):
super(GoofNFSP_Q_GRUNet, self).__init__()
suits = n_players + 1
input_dim = (13 * suits) + 1
output_dim = 13
self.hidden_dim = hidden_dim
self.n_layers = n_layers

self.gru = nn.GRU(
input_dim,
hidden_dim,
n_layers,
batch_first=True,
dropout=dropout_p,
)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()

def forward(self, x, h):
# print(x.shape)
out, h = self.gru(x.float(), h)
out = self.fc(self.relu(out[:, -1]))

out[out == 0] = float("-inf")

return out, h

def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (
weight.new(
self.n_layers, batch_size, self.hidden_dim
)
.zero_()
.to(device)
)
return hidden

class GoofNFSP_Pi_GRUNet(nn.Module):
def __init__(
self,
n_players,
hidden_dim=128,
n_layers=1,
dropout_p=0.2,
):
super(GoofNFSP_Pi_GRUNet, self).__init__()
suits = n_players + 1
input_dim = (13 * suits) + 1
output_dim = 13
self.hidden_dim = hidden_dim
self.n_layers = n_layers

self.gru = nn.GRU(
input_dim,
hidden_dim,
n_layers,
batch_first=True,
dropout=dropout_p,
)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()

def forward(self, x, h):
out, h = self.gru(x.float(), h)
out = self.fc(self.relu(out[:, -1]))

out[out == 0] = float("-inf")

out = F.log_softmax(out)

return out, h

def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (
weight.new(
self.n_layers, batch_size, self.hidden_dim
)
.zero_()
.to(device)
)
return hidden

strategy_paths = {
2: {
"q": "2player/nfsp_gru_selfplay/nfsp_q_net_1610374365_1313000.pt",
"pi": "2player/nfsp_gru_selfplay/nfsp_pi_net_1610374364_1313000.pt",
},
3: {
"q": "3player/nfsp_gru_selfplay/nfsp_q_net_1610426794_1313000.pt",
"pi": "3player/nfsp_gru_selfplay/nfsp_pi_net_1610426793_1313000.pt",
},
}

steps_done = 100000
strategy_networks = {}
for n_players in [2, 3]:
strategy_networks[n_players] = {
"q_net": GoofNFSP_QNetwork(n_players).to(device),
"pi_net": GoofNFSP_PiNetwork(n_players).to(device),
}
strategy_paths[n_players]["q"],
map_location=torch.device("cpu"),
)
)
strategy_paths[n_players]["pi"],
map_location=torch.device("cpu"),
)
)

strategy_networks[n_players]["q_net"].eval()
strategy_networks[n_players]["pi_net"].eval()

def construct_input(
your_hand,
available_prizes,
players,
player_hands,
prize,
):
tensor_input = [prize]
tensor_input.extend(
[1 if i in your_hand else 0 for i in range(1, 14)]
)
tensor_input.extend(
[
1 if i in available_prizes else 0
for i in range(1, 14)
]
)
for player in players:
tensor_input.extend(
[
1 if i in player_hands[player] else 0
for i in range(1, 14)
]
)

def construct_input_sequence(
your_hand_list,
available_prizes_list,
players,
player_hands_list,
prize_list,
transformer=False,
):
state_tensors_list = []
for i in range(len(your_hand_list)):
state_tensors_list.append(
construct_input(
your_hand_list[i],
available_prizes_list[i],
players,
player_hands_list[i],
prize_list[i],
)
)
input_seq = torch.cat(state_tensors_list)
if transformer:
input_seq = input_seq.unsqueeze(1)

input_seq.to(device)
return input_seq

def fill_input_sequence(input_seq, length=13):
fill_tensor = (
torch.zeros(
(
length - input_seq.shape[0],
input_seq.shape[1],
),
dtype=torch.int64,
device=device,
)
- 1
)
filled_input_seq = torch.cat(
[fill_tensor, input_seq], dim=0
)
filled_input_seq.to(device)

return filled_input_seq

def select_action(
state, nfsp_q_net, nfsp_pi_net, training=False
):
global steps_done
eps_sample = random.random()
nu_sample = random.random()

eps_threshold = EPS_END + (
EPS_START - EPS_END
) * math.exp(-1.0 * steps_done / EPS_DECAY)

if nu_sample < NU:
if eps_sample > eps_threshold:
# print("NN")
q_h = nfsp_q_net.init_hidden(state.shape[0])
result, q_h = nfsp_q_net(state, q_h)
result = result.max(1)[1].unsqueeze(1)
else:
# print("random")
cards_in_hand = [
i
for i in range(n_actions)
if state[0][1:14][i] == 1
]
result = torch.tensor(
[random.sample(cards_in_hand, 1)],
device=device,
dtype=torch.long,
)

return result
else:
pi_h = nfsp_pi_net.init_hidden(state.shape[0])
result, pi_h = nfsp_pi_net(state, pi_h)
result = result.max(1)[1].unsqueeze(1)
return result

class EyeOfSonOfTBot(Player):
def __init__(
self, n_players=2, training=False, one_v_one=False
):
self.rounds_won = 0
self.rounds_tied = 0
self.rounds_lost = 0
self.round_reward = 0
self.training = training
self.one_v_one = one_v_one
if not self.one_v_one:
self.initial_state = fill_input_sequence(
torch.tensor(
[-1] * ((13 * (n_players + 1)) + 1),
dtype=torch.int64,
device=device,
).unsqueeze(0)
)
else:
self.opponent_states = []
for i in range(n_players - 1):
self.opponent_states.append(
fill_input_sequence(
torch.tensor(
[-1]
* (
(13 * ((n_players - 1) + 1))
+ 1
),
dtype=torch.int64,
device=device,
).unsqueeze(0)
)
)
self.nfsp_q_net = strategy_networks[n_players][
"q_net"
]
self.nfsp_pi_net = strategy_networks[n_players][
"pi_net"
]

def start_round(
self, *, player_names: Set[str], name: str
):
self.round_reward = 0
self.available = [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
]
self.available_prizes = {
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
}
self.name = name
self.players = [
pn for pn in player_names if pn != name
]
self.player_hands = {
pn: set(
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
)
for pn in self.players
}

def bid(self, *, prize) -> int:
if hasattr(self, "state"):
self.previous_state = (
self.state.detach().clone()
)

self.available_prizes.remove(prize)

if len(self.players) > 1 and self.one_v_one:
suggested_actions = []
for i, player in enumerate(self.players):
new_obs = construct_input(
self.available,
self.available_prizes,
[player],
self.player_hands,
prize,
)
state = torch.cat(
[self.opponent_states[i][1:], new_obs]
)
# print(state.shape)
suggested_actions.append(
select_action(
state.unsqueeze(0), self.training
).item()
)

self.action = max(suggested_actions)
self.available.remove(self.action + 1)
return self.action + 1

else:
new_obs = construct_input(
self.available,
self.available_prizes,
self.players,
self.player_hands,
prize,
)
# assert len(new_obs.shape) == 2
if hasattr(self, "state"):
self.state = torch.cat(
[self.state[1:], new_obs]
)
else:
self.state = torch.cat(
[self.initial_state[1:], new_obs]
)

self.action = select_action(
self.state.unsqueeze(0), self.training
)
self.available.remove(self.action.item() + 1)

return self.action.item() + 1

def result_win(
self,
*,
p_bid: Dict[str, int],
winner: str,
prize: int,
name: str
) -> None:
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
if winner == name:
reward = prize
self.rounds_won += 1
# print("won")
else:
reward = -prize
self.rounds_lost += 1
# print("lost")
self.round_reward += reward
self.reward = torch.tensor([reward], device=device)

def result_tie(
self,
*,
p_bid: Dict[str, int],
prize: int,
name: str
) -> None:
self.rounds_tied += 1
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
self.reward = torch.tensor([0], device=device)

def end_round(
self, *, p_score: Dict[str, int], name: str
) -> None:
final_score = p_score[name]
winner = True
for player_name, score in p_score.items():
if player_name != name and score > final_score:
winner = False

if winner:
reward = 100
else:
reward = -100

self.round_reward += reward
self.reward = torch.tensor([reward], device=device)

if __name__ == "__main__":
EyeOfSonOfTBot(3)
``````

### John Lekberg

``````from collections import deque
from heapq import nlargest, nsmallest
from numbers import Real
from typing import Callable, Union

import random

class JohnBot(Player):
def __init__(self) -> None:
self.past_results = deque(maxlen=50)
self.past_results.append(
("random", 1)
self.strategies = [
"random",
"at-bid",
"counter-daniel",
"S1",
]

def start_round(
self, *, player_names: Set[str], name: str
) -> None:
scores = dict.fromkeys(self.strategies, 0)
for strat, result in self.past_results:
scores[strat] += result
self.strategy = max(scores, key=scores.get)
self.available = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ]
self.remaining_prizes = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ]

def bid(self, *, prize: int) -> int:
bid = min(
self.available, key=self._objective(prize)
)
assert bid in self.available, bid
self.available.remove(bid)
self.remaining_prizes.remove(prize)
return bid

def end_round(
self, *, p_score: Dict[str, int], name: str
) -> None:
"""Prepare to end the round.

p_score -- dict. the end-of-round scores for each player.
name -- str. Your name. (See p_score.)
"""
max_score = max(p_score.values())
n_max_score = sum(
1
for p_name in p_score
if p_score[p_name] == max_score
)
is_winner = (
n_max_score == 1 and p_score[name] == max_score
)
if is_winner:
delta = +2
else:
delta = -1
self.past_results.append((self.strategy, delta))

def _objective(
self, prize: int
) -> Callable[[int], Real]:
sname = self.strategy

if sname == "random":

def f(bid: int) -> Real:
return random.random()

elif sname == "at-bid":

def f(bid: int) -> Real:
return abs(prize - bid)

elif sname == "counter-daniel":
N = len(self.available)
if N > 10:

def f(bid: int) -> Real:
return abs(
bid - (min(self.available) + 1)
)

else:
if prize in nlargest(
N // 3, self.remaining_prizes
):

def f(bid: int) -> Real:
return abs(prize - bid)

else:

def f(bid: int) -> Real:
return random.random()

elif sname == "S1":
N = len(self.available)
if prize in [1, 2, 3, 4]:
f = lambda bid: bid
elif prize in [12, 13]:
f = lambda bid: bid
else:
f = lambda bid: abs(bid - (prize + 3))
return f
``````

### Daniel Bassett

``````class BotBoi(Player):
"""Simple Strategy Bot. Early in the game, when there
are more than 10 of 13 available, SimpleStrat selects
the lowest card available. In the intermediate, between
5 and 10 cards, SimpleStrat chooses a card at random.
Later in the game, during the final 5 cards, SimpleStrat
chooses the maximum card available. This is a sort of
'value investing' strategy, waiting for the later stages
of the game to use one's best cards. The results were
mediocre, with myself scoring slightly better than I did
using RandomBot.
"""

def start_round(
self, *, player_names: Set[str], name: str
):
self.available = [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
]

def bid(self, *, prize) -> int:
length = len(self.available)
if length % 2 == 0:
low = min(self.available)
self.available.remove(low)
return low
else:
high = max(self.available)
self.available.remove(high)
return high
``````