JUNTO Practice: Battleship Bots (Part 3)
Discussed on February 20, 2021.
Improve your Battleship bots. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete at the next meeting.
Results:
ZeroPosition (John Random Arrangement) 817
VanillaNeeson (Horizontal trained, Oscar Random Arrangement) 159
GameResult.DRAW 24
ZeroPosition (John Random Arrangement) 509
JohnBot_D2 (John Random Arrangement) 448
GameResult.DRAW 43
JohnBot_D2 (John Random Arrangement) 653
VanillaNeeson (Horizontal trained, Oscar Random Arrangement) 308
GameResult.DRAW 39
Solutions
Click to see:
Oscar Martinez
class BattleShipVanillaCNNXL(nn.Module):
def __init__(self):
super(BattleShipVanillaCNNXL, self).__init__()
self.conv1 = nn.Conv2d(1, 3, 5, padding=2)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(3, 5, 5, padding=2)
self.fc1 = nn.Linear(5 * 2 * 2, 100)
self.fc2 = nn.Linear(100, 100)
self.fc3 = nn.Linear(100, 100)
def forward(self, x):
self.mask = x.detach().clone().flatten(1) != 0
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 5 * 2 * 2)
x = F.relu((self.fc1(x)))
x = F.relu(self.fc2(x))
x = self.fc3(x)
x[self.mask] = float("-inf")
return x
John Lekberg
Contents:
- Player code.
- Training code.
Player code
import numpy as np
import torch
import torch.nn as nn
class JohnBot_D2(Player):
def __init__(self, path_state_dict):
state_dict = torch.load(path_state_dict)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(100, 100)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc1(x)
x = self.sigmoid(x)
return x
self.net = Net()
self.net.load_state_dict(state_dict)
self.net.eval()
# -- Behavior --
def arrange(self) -> Arrangement:
return random_arrangement()
def guess(self) -> Position:
known_board = torch.tensor(self.board).float()
with torch.no_grad():
predicted_board = self.net(known_board)
guess_i = np.ma.array(
predicted_board, mask=~self.can_guess
).argmax()
self.can_guess[guess_i] = False
guess_pos = self._i_to_pos(guess_i)
return guess_pos
# -- Hooks --
def hook_start(self):
self.board = np.zeros((100,))
self.can_guess = np.full((100,), True)
def hook_invalid_guess(self, *, guess):
raise RuntimeError
def hook_self_guess(self, *, guess, result):
i = self._pos_to_i(guess)
if result is GuessResult.HIT:
value = +1
elif result is GuessResult.MISS:
value = -1
elif result is GuessResult.SINK:
value = +1
self.board[i] = value
# -- Utilities --
@staticmethod
def _pos_to_i(pos: Position) -> int:
return (10 * (pos.row - 1)) + (pos.col - 1)
@staticmethod
def _i_to_pos(i: int) -> Position:
assert 0 <= i < 100
r, c = divmod(i, 10)
return Position(r + 1, c + 1)
Training code
from contextlib import suppress
from itertools import product
import random
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
def random_arrangement():
def attempt():
all_positions = [
Position(r, c)
for r, c in product(range(1, 11), repeat=2)
]
random.shuffle(all_positions)
all_directions = [
Direction.HORIZONTAL,
Direction.VERTICAL,
]
return Arrangement(
ShipPlacement(
all_positions.pop(),
random.choice(all_directions),
),
ShipPlacement(
all_positions.pop(),
random.choice(all_directions),
),
ShipPlacement(
all_positions.pop(),
random.choice(all_directions),
),
ShipPlacement(
all_positions.pop(),
random.choice(all_directions),
),
ShipPlacement(
all_positions.pop(),
random.choice(all_directions),
),
)
while True:
with suppress(InvalidArrangementError):
return attempt()
def random_board():
arr = random_arrangement()
board = np.full((10, 10), False)
poss = set()
poss |= position_set(arr.carrier, 5)
poss |= position_set(arr.battleship, 4)
poss |= position_set(arr.destroyer, 3)
poss |= position_set(arr.submarine, 3)
poss |= position_set(arr.patrol_boat, 2)
for pos in poss:
r = pos.row - 1
c = pos.col - 1
board[r, c] = True
return board.flatten()
def random_guess_mask():
return np.random.choice([False, True], size=(100,))
def random_X_y(*, n=1):
Xs = []
ys = []
for _ in range(n):
board = random_board()
guess_mask = random_guess_mask()
assert board.shape == guess_mask.shape
X = np.empty_like(board, dtype=float)
X[board] = 1
X[~board] = -1
X[guess_mask] = 0
Xs.append(X)
y = np.empty_like(board, dtype=float)
y[board] = 1
y[~board] = 0
ys.append(y)
return np.stack(Xs), np.stack(ys)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(100, 100)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc1(x)
x = self.sigmoid(x)
return x
net = Net()
criterion = nn.L1Loss()
optimizer = optim.SGD(
net.parameters(), lr=0.001, momentum=0.9
)
N_EPOCH = 50_000
PRINT_EVERY_N = 500
for epoch in range(N_EPOCH):
X, y = random_X_y(n=400)
X = torch.tensor(X).float()
y = torch.tensor(y).float()
optimizer.zero_grad()
y_ = net(X)
loss = criterion(y_, y)
loss.backward()
optimizer.step()
name = f"jb4-net-{int(time.time())}.pt"
torch.save(net.state_dict(), name)
Daniel Bassett
class ZeroPosition(Player):
def hook_start(self) -> None:
self.remaining_guesses = [
Position(r, c)
for r in range(1, 11)
for c in range(1, 11)
]
def arrange(self) -> Arrangement:
return Arrangement(
ShipPlacement(
Position(1, 1), Direction.HORIZONTAL
),
ShipPlacement(
Position(2, 1), Direction.HORIZONTAL
),
ShipPlacement(
Position(3, 1), Direction.HORIZONTAL
),
ShipPlacement(
Position(4, 1), Direction.HORIZONTAL
),
ShipPlacement(
Position(5, 1), Direction.HORIZONTAL
),
)
def guess(self) -> Position:
assert len(self.remaining_guesses) > 0
result = self.remaining_guesses[0]
self.remaining_guesses.remove(result)
return result