JUNTO Practice: Goofspiel Bots
Discussed on December 15, 2020.
Create a bot that plays Goofspiel. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete during the next meeting.
Competition Results
After 5,000 rounds:
- Daniel won 2,896 rounds.
- John won 1,762 rounds.
- Oscar won 342 rounds.
Solutions
Click to see:
Oscar Martinez
from collections import namedtuple
import itertools
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T
n_players = 3
device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)
from abc import ABC, abstractmethod
from typing import Dict, Set
class GoofDQN(nn.Module):
def __init__(self, n_players):
super(GoofDQN, self).__init__()
suits = n_players + 1
self.fc1 = nn.Linear(
(13 * suits) + 1, 26 * suits * 2
)
self.fc1_bn = nn.BatchNorm1d(26 * suits * 2)
self.fc2 = nn.Linear(26 * suits * 2, 26 * suits * 2)
self.fc2_bn = nn.BatchNorm1d(26 * suits * 2)
self.fc3 = nn.Linear(26 * suits * 2, 26 * 2)
self.fc3_bn = nn.BatchNorm1d(26 * 2)
self.fc4 = nn.Linear(26 * 2, 26)
self.fc4_bn = nn.BatchNorm1d(26)
self.out = nn.Linear(26, 13)
def forward(self, x):
self.mask = x.detach().clone()[0][1:14]
x = F.relu(self.fc1_bn(self.fc1(x.float())))
x = F.relu(self.fc2_bn(self.fc2(x)))
x = F.relu(self.fc3_bn(self.fc3(x)))
x = F.relu(self.fc4_bn(self.fc4(x)))
output = self.out(x) * self.mask
output[output == 0] = float("-inf")
return output
policy_net = GoofDQN(n_players).to(device)
policy_net.load_state_dict(torch.load("GoofNet1_bs32"))
policy_net.eval()
def construct_input(
your_hand,
available_prizes,
players,
player_hands,
prize,
):
tensor_input = [prize]
tensor_input.extend(
[1 if i in your_hand else 0 for i in range(1, 14)]
)
tensor_input.extend(
[
1 if i in available_prizes else 0
for i in range(1, 14)
]
)
for player in players:
tensor_input.extend(
[
1 if i in player_hands[player] else 0
for i in range(1, 14)
]
)
return torch.tensor(tensor_input).unsqueeze(0)
def select_action(state, training=False):
with torch.no_grad():
result = policy_net(state).max(1)[1].unsqueeze(1)
return result
class OscarBot(Player):
"""Randomly place a bet each hand."""
def __init__(self, training=False):
self.rounds_won = 0
self.rounds_tied = 0
self.rounds_lost = 0
self.round_reward = 0
self.training = training
def start_round(
self, *, player_names: Set[str], name: str
):
self.round_reward = 0
self.available = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
]
self.available_prizes = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
}
self.name = name
self.players = [
pn for pn in player_names if pn != name
]
self.player_hands = {
pn: set(
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
)
for pn in self.players
}
def bid(self, *, prize) -> int:
if hasattr(self, "state"):
self.previous_state = (
self.state.detach().clone()
)
self.available_prizes.remove(prize)
self.state = construct_input(
self.available,
self.available_prizes,
self.players,
self.player_hands,
prize,
)
self.action = select_action(
self.state, self.training
)
self.available.remove(self.action.item() + 1)
return self.action.item() + 1
def result_win(
self,
*,
p_bid: Dict[str, int],
winner: str,
prize: int,
name: str
) -> None:
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
if winner == name:
reward = prize
self.rounds_won += 1
else:
reward = -prize
self.rounds_lost += 1
self.round_reward += reward
self.reward = torch.tensor([reward], device=device)
def result_tie(
self,
*,
p_bid: Dict[str, int],
prize: int,
name: str
) -> None:
self.rounds_tied += 1
[
self.player_hands[k].remove(v)
for k, v in p_bid.items()
if k != name
]
self.reward = torch.tensor([0], device=device)
def end_round(
self, *, p_score: Dict[str, int], name: str
) -> None:
final_score = p_score[name]
winner = True
for player_name, score in p_score.items():
if player_name != name and score > final_score:
winner = False
if winner:
reward = 100
else:
reward = -100
self.round_reward += reward
self.reward = torch.tensor([reward], device=device)
John Lekberg
class JohnBot(Player):
def start_round(
self, *, player_names: Set[str], name: str
) -> None:
self.available = set(range(1, 14))
def bid(self, *, prize: int) -> int:
bid = prize
self.available.remove(bid)
return bid
Daniel Bassett
class DanielBot(Player):
def start_round(
self, *, player_names: Set[str], name: str
):
self.available = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
]
def bid(self, *, prize) -> int:
length = len(self.available)
if length > 10:
low = min(self.available)
self.available.remove(low)
return low
elif length < 10 and length > 5:
card = random.choice(self.available)
self.available.remove(card)
return card
else:
high = max(self.available)
self.available.remove(high)
return high