Return to JUNTO

JUNTO Practice: Goofspiel Bots

Discussed on December 15, 2020.

Create a bot that plays Goofspiel. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete during the next meeting.

Competition Results

After 5,000 rounds:

Solutions

Click to see:

Oscar Martinez

from collections import namedtuple
import itertools
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T

n_players = 3
device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

from abc import ABC, abstractmethod
from typing import Dict, Set


class GoofDQN(nn.Module):
    def __init__(self, n_players):
        super(GoofDQN, self).__init__()
        suits = n_players + 1
        self.fc1 = nn.Linear(
            (13 * suits) + 1, 26 * suits * 2
        )
        self.fc1_bn = nn.BatchNorm1d(26 * suits * 2)
        self.fc2 = nn.Linear(26 * suits * 2, 26 * suits * 2)
        self.fc2_bn = nn.BatchNorm1d(26 * suits * 2)
        self.fc3 = nn.Linear(26 * suits * 2, 26 * 2)
        self.fc3_bn = nn.BatchNorm1d(26 * 2)
        self.fc4 = nn.Linear(26 * 2, 26)
        self.fc4_bn = nn.BatchNorm1d(26)

        self.out = nn.Linear(26, 13)

    def forward(self, x):
        self.mask = x.detach().clone()[0][1:14]
        x = F.relu(self.fc1_bn(self.fc1(x.float())))
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.relu(self.fc3_bn(self.fc3(x)))
        x = F.relu(self.fc4_bn(self.fc4(x)))
        output = self.out(x) * self.mask
        output[output == 0] = float("-inf")

        return output


policy_net = GoofDQN(n_players).to(device)
policy_net.load_state_dict(torch.load("GoofNet1_bs32"))
policy_net.eval()


def construct_input(
    your_hand,
    available_prizes,
    players,
    player_hands,
    prize,
):
    tensor_input = [prize]
    tensor_input.extend(
        [1 if i in your_hand else 0 for i in range(1, 14)]
    )
    tensor_input.extend(
        [
            1 if i in available_prizes else 0
            for i in range(1, 14)
        ]
    )
    for player in players:
        tensor_input.extend(
            [
                1 if i in player_hands[player] else 0
                for i in range(1, 14)
            ]
        )

    return torch.tensor(tensor_input).unsqueeze(0)


def select_action(state, training=False):
    with torch.no_grad():
        result = policy_net(state).max(1)[1].unsqueeze(1)

    return result


class OscarBot(Player):
    """Randomly place a bet each hand."""

    def __init__(self, training=False):
        self.rounds_won = 0
        self.rounds_tied = 0
        self.rounds_lost = 0
        self.round_reward = 0
        self.training = training

    def start_round(
        self, *, player_names: Set[str], name: str
    ):
        self.round_reward = 0
        self.available = [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
        ]
        self.available_prizes = {
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
        }
        self.name = name
        self.players = [
            pn for pn in player_names if pn != name
        ]
        self.player_hands = {
            pn: set(
                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
            )
            for pn in self.players
        }

    def bid(self, *, prize) -> int:
        if hasattr(self, "state"):
            self.previous_state = (
                self.state.detach().clone()
            )

        self.available_prizes.remove(prize)

        self.state = construct_input(
            self.available,
            self.available_prizes,
            self.players,
            self.player_hands,
            prize,
        )

        self.action = select_action(
            self.state, self.training
        )
        self.available.remove(self.action.item() + 1)

        return self.action.item() + 1

    def result_win(
        self,
        *,
        p_bid: Dict[str, int],
        winner: str,
        prize: int,
        name: str
    ) -> None:
        [
            self.player_hands[k].remove(v)
            for k, v in p_bid.items()
            if k != name
        ]
        if winner == name:
            reward = prize
            self.rounds_won += 1
        else:
            reward = -prize
            self.rounds_lost += 1
        self.round_reward += reward
        self.reward = torch.tensor([reward], device=device)

    def result_tie(
        self,
        *,
        p_bid: Dict[str, int],
        prize: int,
        name: str
    ) -> None:
        self.rounds_tied += 1
        [
            self.player_hands[k].remove(v)
            for k, v in p_bid.items()
            if k != name
        ]
        self.reward = torch.tensor([0], device=device)

    def end_round(
        self, *, p_score: Dict[str, int], name: str
    ) -> None:
        final_score = p_score[name]
        winner = True
        for player_name, score in p_score.items():
            if player_name != name and score > final_score:
                winner = False

        if winner:
            reward = 100
        else:
            reward = -100

        self.round_reward += reward
        self.reward = torch.tensor([reward], device=device)

John Lekberg

class JohnBot(Player):
    def start_round(
        self, *, player_names: Set[str], name: str
    ) -> None:
        self.available = set(range(1, 14))

    def bid(self, *, prize: int) -> int:
        bid = prize
        self.available.remove(bid)
        return bid

Daniel Bassett

class DanielBot(Player):
    def start_round(
        self, *, player_names: Set[str], name: str
    ):
        self.available = [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
        ]

    def bid(self, *, prize) -> int:
        length = len(self.available)
        if length > 10:
            low = min(self.available)
            self.available.remove(low)
            return low
        elif length < 10 and length > 5:
            card = random.choice(self.available)
            self.available.remove(card)
            return card
        else:
            high = max(self.available)
            self.available.remove(high)
            return high