Return to JUNTO

JUNTO Practice: Battleship Bots (Part 3)

Discussed on February 20, 2021.

Improve your Battleship bots. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete at the next meeting.


Results:

ZeroPosition (John Random Arrangement)    817
VanillaNeeson (Horizontal trained, Oscar Random Arrangement)    159
GameResult.DRAW       24

ZeroPosition (John Random Arrangement)   509
JohnBot_D2 (John Random Arrangement)    448
GameResult.DRAW       43

JohnBot_D2 (John Random Arrangement)    653
VanillaNeeson (Horizontal trained, Oscar Random Arrangement)    308
GameResult.DRAW       39

Solutions

Click to see:

Oscar Martinez

class BattleShipVanillaCNNXL(nn.Module):
    def __init__(self):
        super(BattleShipVanillaCNNXL, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 5, padding=2)
        self.fc1 = nn.Linear(5 * 2 * 2, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)

    def forward(self, x):
        self.mask = x.detach().clone().flatten(1) != 0
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 5 * 2 * 2)
        x = F.relu((self.fc1(x)))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x[self.mask] = float("-inf")

        return x

John Lekberg

Contents:

Player code

import numpy as np
import torch
import torch.nn as nn


class JohnBot_D2(Player):
    def __init__(self, path_state_dict):
        state_dict = torch.load(path_state_dict)

        class Net(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc1 = nn.Linear(100, 100)
                self.sigmoid = nn.Sigmoid()

            def forward(self, x):
                x = self.fc1(x)
                x = self.sigmoid(x)
                return x

        self.net = Net()
        self.net.load_state_dict(state_dict)
        self.net.eval()

    # -- Behavior --

    def arrange(self) -> Arrangement:
        return random_arrangement()

    def guess(self) -> Position:
        known_board = torch.tensor(self.board).float()
        with torch.no_grad():
            predicted_board = self.net(known_board)
        guess_i = np.ma.array(
            predicted_board, mask=~self.can_guess
        ).argmax()
        self.can_guess[guess_i] = False
        guess_pos = self._i_to_pos(guess_i)
        return guess_pos

    # -- Hooks --

    def hook_start(self):
        self.board = np.zeros((100,))
        self.can_guess = np.full((100,), True)

    def hook_invalid_guess(self, *, guess):
        raise RuntimeError

    def hook_self_guess(self, *, guess, result):
        i = self._pos_to_i(guess)

        if result is GuessResult.HIT:
            value = +1
        elif result is GuessResult.MISS:
            value = -1
        elif result is GuessResult.SINK:
            value = +1

        self.board[i] = value

    # -- Utilities --

    @staticmethod
    def _pos_to_i(pos: Position) -> int:
        return (10 * (pos.row - 1)) + (pos.col - 1)

    @staticmethod
    def _i_to_pos(i: int) -> Position:
        assert 0 <= i < 100
        r, c = divmod(i, 10)
        return Position(r + 1, c + 1)

Training code

from contextlib import suppress
from itertools import product
import random
import time

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


def random_arrangement():
    def attempt():
        all_positions = [
            Position(r, c)
            for r, c in product(range(1, 11), repeat=2)
        ]
        random.shuffle(all_positions)
        all_directions = [
            Direction.HORIZONTAL,
            Direction.VERTICAL,
        ]
        return Arrangement(
            ShipPlacement(
                all_positions.pop(),
                random.choice(all_directions),
            ),
            ShipPlacement(
                all_positions.pop(),
                random.choice(all_directions),
            ),
            ShipPlacement(
                all_positions.pop(),
                random.choice(all_directions),
            ),
            ShipPlacement(
                all_positions.pop(),
                random.choice(all_directions),
            ),
            ShipPlacement(
                all_positions.pop(),
                random.choice(all_directions),
            ),
        )

    while True:
        with suppress(InvalidArrangementError):
            return attempt()


def random_board():
    arr = random_arrangement()
    board = np.full((10, 10), False)
    poss = set()
    poss |= position_set(arr.carrier, 5)
    poss |= position_set(arr.battleship, 4)
    poss |= position_set(arr.destroyer, 3)
    poss |= position_set(arr.submarine, 3)
    poss |= position_set(arr.patrol_boat, 2)
    for pos in poss:
        r = pos.row - 1
        c = pos.col - 1
        board[r, c] = True
    return board.flatten()


def random_guess_mask():
    return np.random.choice([False, True], size=(100,))


def random_X_y(*, n=1):
    Xs = []
    ys = []
    for _ in range(n):
        board = random_board()
        guess_mask = random_guess_mask()
        assert board.shape == guess_mask.shape
        X = np.empty_like(board, dtype=float)
        X[board] = 1
        X[~board] = -1
        X[guess_mask] = 0
        Xs.append(X)
        y = np.empty_like(board, dtype=float)
        y[board] = 1
        y[~board] = 0
        ys.append(y)
    return np.stack(Xs), np.stack(ys)


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(100, 100)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x


net = Net()

criterion = nn.L1Loss()
optimizer = optim.SGD(
    net.parameters(), lr=0.001, momentum=0.9
)

N_EPOCH = 50_000
PRINT_EVERY_N = 500

for epoch in range(N_EPOCH):
    X, y = random_X_y(n=400)
    X = torch.tensor(X).float()
    y = torch.tensor(y).float()

    optimizer.zero_grad()
    y_ = net(X)
    loss = criterion(y_, y)
    loss.backward()
    optimizer.step()

name = f"jb4-net-{int(time.time())}.pt"
torch.save(net.state_dict(), name)

Daniel Bassett

class ZeroPosition(Player):
    def hook_start(self) -> None:
        self.remaining_guesses = [
            Position(r, c)
            for r in range(1, 11)
            for c in range(1, 11)
        ]

    def arrange(self) -> Arrangement:
        return Arrangement(
            ShipPlacement(
                Position(1, 1), Direction.HORIZONTAL
            ),
            ShipPlacement(
                Position(2, 1), Direction.HORIZONTAL
            ),
            ShipPlacement(
                Position(3, 1), Direction.HORIZONTAL
            ),
            ShipPlacement(
                Position(4, 1), Direction.HORIZONTAL
            ),
            ShipPlacement(
                Position(5, 1), Direction.HORIZONTAL
            ),
        )

    def guess(self) -> Position:
        assert len(self.remaining_guesses) > 0
        result = self.remaining_guesses[0]
        self.remaining_guesses.remove(result)
        return result