Return to JUNTO

JUNTO Practice: Battleship Bots (Part 1)

Discussed on January 26, 2021.

Create a bot that plays Battleship. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete at the next meeting.

Results

P1: Dan
P2: John

GameResult.P2_WIN    464
GameResult.P1_WIN    444
GameResult.DRAW       92

P1: Dan
P2: Oscar

GameResult.P2_WIN    818
GameResult.P1_WIN    151
GameResult.DRAW       31

P1: John
P2: Oscar

GameResult.P2_WIN    808
GameResult.P1_WIN    163
GameResult.DRAW       29

Solutions

Click to see:

Oscar Martinez

class BattleShipCNN_BN_DOut5(nn.Module):
    def __init__(self):
        super(BattleShipCNN_BN_DOut5, self).__init__()
        self.conv1 = nn.Conv2d(1,3,5,padding=2)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(3,5,5,padding=2)
        self.fc1 = nn.Linear(5 * 2 * 2, 100)
        self.fc1_bn = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100, 100)
        
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        self.mask = (x.detach().clone().flatten(1) != 0)
        #print("mask",self.mask.shape)
        #print("x",x.shape)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 5 * 2 * 2)
        x = self.dropout(F.relu(self.fc1_bn(self.fc1(x))))
        x = self.fc2(x)
        #print("Final x",x.shape)
        x[self.mask] = float('-inf')
        
        return x

def random_place(board_dim,ship_size):
    position = [None, None, None]
    orientation = np.random.randint(0,1)
    orient_coord = np.random.randint(1,board_dim - (ship_size - 1))
    non_orient_coord = np.random.randint(1,board_dim)
    
    position[orientation] = orient_coord
    position[1 - orientation] = non_orient_coord
    if orientation == 1:
        position[2] = Direction.VERTICAL
    else:
        position[2] = Direction.HORIZONTAL
    
    return ShipPlacement(Position(position[0], position[1]), position[2])

def random_arrangement(board_dim):
    while True:
        try:
            arrangement = Arrangement(*[random_place(board_dim, ship_size) for ship_size in [5,4,3,3,2]])
            break
        except InvalidArrangementError:
            pass
    
    return arrangement

def extract_coord(flat_index):
    row = (flat_index // 10) + 1
    col = (flat_index % 10) + 1
    
    return Position(row,col)

class NeesonBot(Player):
    def __init__(self,training=False):
        self.training = training
        
    def hook_start(self):
        self.enemy_board = torch.zeros((10,10),device=device)
        self.sunk = 0
        self.turns = 0
        
    def hook_self_guess(self, *, guess: Position, result: GuessResult):
        self.previous_state = self.enemy_board.detach().clone()
        self.turns += 1
        if result == GuessResult.HIT:
            reward = 1
            self.enemy_board[guess.row-1,guess.col-1] = 1
        elif result == GuessResult.SINK:
            self.sunk += 1
            reward = 5
            self.enemy_board[guess.row-1,guess.col-1] = 1
            if self.sunk == 5:
                bins = (self.enemy_board + 1).flatten().int().bincount()
                hits = bins[-1]
                misses = bins[0]
                if misses == 0:
                    misses += 1
                    
                factor = (hits / misses).item()
                reward = reward * (10 * factor)
                # Experimental, terminate game:
                self.enemy_board = None
        elif result == GuessResult.MISS:
            reward = -1
            self.enemy_board[guess.row-1,guess.col-1] = -1
        
        self.reward = torch.tensor([reward], device=device)
        
        if self.enemy_board is None:
                replay_memory.push(
                    self.previous_state.unsqueeze(0).unsqueeze(0), 
                    self.action, 
                    self.enemy_board, 
                    self.reward
                )
        else:
            replay_memory.push(
                self.previous_state.unsqueeze(0).unsqueeze(0), 
                self.action, 
                self.enemy_board.detach().clone().unsqueeze(0).unsqueeze(0), 
                self.reward
            )
        
        if self.training:
            optimize_model()
        
    def arrange(self):
        return random_arrangement(10)
    
    def guess(self):
        action = select_action(self.enemy_board.unsqueeze(0).unsqueeze(0))
        
        self.action = action
        
        return extract_coord(action.item())

John Lekberg

import random

class RandomPlayer(Player):
    def hook_start(self) -> None:
        self.remaining_guesses = [
            Position(r, c)
            for r in range(1, 11)
            for c in range(1, 11)
        ]
        
    def arrange(self) -> Arrangement:
        return Arrangement(
            ShipPlacement(Position(1, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(2, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(3, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(4, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(5, 1), Direction.HORIZONTAL),
        )
    
    def guess(self) -> Position:
        assert len(self.remaining_guesses) > 0
        result = random.choice(self.remaining_guesses)
        self.remaining_guesses.remove(result)
        return result

Daniel Bassett

import random

class RandomPlayer(Player):
    def hook_start(self) -> None:
        self.remaining_guesses = [
            Position(r, c)
            for r in range(1, 11)
            for c in range(1, 11)
        ]
        
    def arrange(self) -> Arrangement:
        return Arrangement(
            ShipPlacement(Position(1, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(2, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(3, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(4, 1), Direction.HORIZONTAL),
            ShipPlacement(Position(5, 1), Direction.HORIZONTAL),
        )
    
    def guess(self) -> Position:
        assert len(self.remaining_guesses) > 0
        result = random.choice(self.remaining_guesses)
        self.remaining_guesses.remove(result)
        return result