JUNTO Practice: Goofspiel Bots (Part 2)
Discussed on December 30, 2020.
Improve your Goofspiel bots. We will have them compete again during the next meeting.
Competition Results
After 10,000 rounds:
- Oscar won 4,208 rounds.
- Daniel won 3,273 rounds.
- John won 2,519 rounds.
Solutions
Click to see:
Oscar Martinez
from collections import namedtuple
from goofspiel import *
import itertools
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T
device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)
GAMMA = 0.999
NU = 0.2
EPS_START = 1
EPS_END = 0.05
EPS_DECAY = 1000 # higher is slower
n_actions = 13
class GoofNFSP_QNetwork(nn.Module):
def __init__(self, n_players):
super(GoofNFSP_QNetwork, self).__init__()
suits = n_players + 1
self.fc1 = nn.Linear((13 * suits) + 1, 26 * suits)
self.fc1_bn = nn.BatchNorm1d(26 * suits)
self.fc2 = nn.Linear(26 * suits, 26 * suits)
self.fc2_bn = nn.BatchNorm1d(26 * suits)
self.out = nn.Linear(26 * suits, 13)
def forward(self, x):
self.mask = x.detach().clone()[:, 1:14]
x = F.relu(self.fc1_bn(self.fc1(x.float())))
x = F.relu(self.fc2_bn(self.fc2(x)))
output = self.out(x) * self.mask
output[output == 0] = float("-inf")
return output
class GoofNFSP_PiNetwork(nn.Module):
def __init__(self, n_players):
super(GoofNFSP_PiNetwork, self).__init__()
suits = n_players + 1
self.fc1 = nn.Linear((13 * suits) + 1, 26 * suits)
self.fc1_bn = nn.BatchNorm1d(26 * suits)
# self.fc2 = nn.Linear(26 * suits, 26 * suits)
# self.fc2_bn = nn.BatchNorm1d(26 * suits)
self.out = nn.Linear(26 * suits, 13)
def forward(self, x):
self.mask = x.detach().clone()[:, 1:14]
x = F.relu(self.fc1_bn(self.fc1(x.float())))
# x = F.relu(self.fc2_bn(self.fc2(x)))
output = self.out(x) * self.mask
output[output == 0] = float("-inf")
output = F.log_softmax(output)
return output
strategy_paths = {
2: {
"q": "2player/nfsp_selfplay/nfsp_q_net_1609266836_3122436.pt",
"pi": "2player/nfsp_selfplay/nfsp_pi_net_1609266836_3122436.pt",
},
3: {
"q": "3player/nfsp_q_net_1609281483_585000.pt",
"pi": "3player/nfsp_pi_net_1609281483_585000.pt",
},
}
steps_done = 100000
strategy_networks = {}
for n_players in [2, 3]:
strategy_networks[n_players] = {
"q_net": GoofNFSP_QNetwork(n_players).to(device),
"pi_net": GoofNFSP_PiNetwork(n_players).to(device),
}
strategy_networks[n_players]["q_net"].load_state_dict(
torch.load(
strategy_paths[n_players]["q"],
map_location=torch.device("cpu"),
)
)
strategy_networks[n_players]["pi_net"].load_state_dict(
torch.load(
strategy_paths[n_players]["pi"],
map_location=torch.device("cpu"),
)
)
strategy_networks[n_players]["q_net"].eval()
strategy_networks[n_players]["pi_net"].eval()
def construct_input(
your_hand,
available_prizes,
players,
player_hands,
prize,
):
tensor_input = [prize]
tensor_input.extend(
[1 if i in your_hand else 0 for i in range(1, 14)]
)
tensor_input.extend(
[
1 if i in available_prizes else 0
for i in range(1, 14)
]
)
for player in players:
tensor_input.extend(
[
1 if i in player_hands[player] else 0
for i in range(1, 14)
]
)
return torch.tensor(tensor_input).unsqueeze(0)
def select_action(
state, nfsp_q_net, nfsp_pi_net, training=False
):
global steps_done
eps_sample = random.random()
nu_sample = random.random()
eps_threshold = EPS_END + (
EPS_START - EPS_END
) * math.exp(-1.0 * steps_done / EPS_DECAY)
if nu_sample < NU:
if eps_sample > eps_threshold:
# print("NN")
with torch.no_grad():
result = (
nfsp_q_net(state).max(1)[1].unsqueeze(1)
)
else:
# print("random")
cards_in_hand = [
i
for i in range(n_actions)
if state[0][1:14][i] == 1
]
result = torch.tensor(
[random.sample(cards_in_hand, 1)],
device=device,
dtype=torch.long,
)
return result
else:
with torch.no_grad():
result = (
nfsp_pi_net(state).max(1)[1].unsqueeze(1)
)
return result
class SonOfTBot(Player):
def __init__(
self, n_players=2, training=False, one_v_one=False
):
self.rounds_won = 0
self.rounds_tied = 0
self.rounds_lost = 0
self.round_reward = 0
self.training = training
self.one_v_one = one_v_one
self.nfsp_q_net = strategy_networks[n_players][
"q_net"
]
self.nfsp_pi_net = strategy_networks[n_players][
"pi_net"
]
def start_round(
self, *, player_names: Set[str], name: str
):
self.round_reward = 0
self.available = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ]
self.available_prizes = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, }
self.name = name
self.players = [ pn for pn in player_names if pn != name ]
self.player_hands = {
pn: set( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
for pn in self.players
}
def bid(self, *, prize) -> int:
if hasattr(self, "state"):
self.previous_state = (
self.state.detach().clone()
)
self.available_prizes.remove(prize)
if len(self.players) > 1 and self.one_v_one:
suggested_actions = []
for player in self.players:
state = construct_input(
self.available,
self.available_prizes,
[player],
self.player_hands,
prize,
)
suggested_actions.append(
select_action(state, self.training).item()
)
self.action = max(suggested_actions)
self.available.remove(self.action + 1)
return self.action + 1
else:
self.state = construct_input(
self.available,
self.available_prizes,
self.players,
self.player_hands,
prize,
)
self.action = select_action(
self.state,
nfsp_q_net=self.nfsp_q_net,
nfsp_pi_net=self.nfsp_pi_net,
training=self.training,
)
self.available.remove(self.action.item() + 1)
return self.action.item() + 1
def result_win(
self,
*,
p_bid: Dict[str, int],
winner: str,
prize: int,
name: str
) -> None:
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
if winner == name:
reward = prize
self.rounds_won += 1
# print("won")
else:
reward = -prize
self.rounds_lost += 1
# print("lost")
self.round_reward += reward
self.reward = torch.tensor([reward], device=device)
def result_tie(
self,
*,
p_bid: Dict[str, int],
prize: int,
name: str
) -> None:
self.rounds_tied += 1
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
self.reward = torch.tensor([0], device=device)
def end_round(
self, *, p_score: Dict[str, int], name: str
) -> None:
final_score = p_score[name]
winner = True
for player_name, score in p_score.items():
if player_name != name and score > final_score:
winner = False
if winner:
reward = 100
else:
reward = -100
self.round_reward += reward
self.reward = torch.tensor([reward], device=device)
if __name__ == "__main__":
SonOfTBot(3)
John Lekberg
from collections import deque
from heapq import nlargest, nsmallest
from numbers import Real
from typing import Callable, Union
import random
class JohnBot(Player):
def __init__(self) -> None:
self.past_results = deque(maxlen=50)
self.past_results.append(
("random", 1)
) # Force start with random
self.strategies = [
"random",
"at-bid",
"counter-daniel",
"S1",
]
def start_round(
self, *, player_names: Set[str], name: str
) -> None:
scores = dict.fromkeys(self.strategies, 0)
for strat, result in self.past_results:
scores[strat] += result
self.strategy = max(scores, key=scores.get)
self.available = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ]
self.remaining_prizes = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ]
def bid(self, *, prize: int) -> int:
bid = min(
self.available, key=self._objective(prize)
)
assert bid in self.available, bid
self.available.remove(bid)
self.remaining_prizes.remove(prize)
return bid
def end_round(
self, *, p_score: Dict[str, int], name: str
) -> None:
"""Prepare to end the round.
p_score -- dict. the end-of-round scores for each player.
name -- str. Your name. (See p_score.)
"""
max_score = max(p_score.values())
n_max_score = sum(
1
for p_name in p_score
if p_score[p_name] == max_score
)
is_winner = (
n_max_score == 1 and p_score[name] == max_score
)
if is_winner:
delta = +2
else:
delta = -1
self.past_results.append((self.strategy, delta))
def _objective(
self, prize: int
) -> Callable[[int], Real]:
sname = self.strategy
if sname == "random":
def f(bid: int) -> Real:
return random.random()
elif sname == "at-bid":
def f(bid: int) -> Real:
return abs(prize - bid)
elif sname == "counter-daniel":
N = len(self.available)
if N > 10:
def f(bid: int) -> Real:
return abs(
bid - (min(self.available) + 1)
)
else:
if prize in nlargest(
N // 3, self.remaining_prizes
):
def f(bid: int) -> Real:
return abs(prize - bid)
else:
def f(bid: int) -> Real:
return random.random()
elif sname == "S1":
N = len(self.available)
if prize in [1, 2, 3, 4]:
f = lambda bid: bid
elif prize in [12, 13]:
f = lambda bid: bid
else:
f = lambda bid: abs(bid - (prize + 3))
return f
Daniel Bassett
class DanielBot(Player):
def start_round(
self, *, player_names: Set[str], name: str
):
self.available = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
]
def bid(self, *, prize) -> int:
length = len(self.available)
if length > 10:
low = min(self.available)
self.available.remove(low)
return low
elif length < 10 and length > 5:
card = random.choice(self.available)
self.available.remove(card)
return card
else:
high = max(self.available)
self.available.remove(high)
return high