Return to JUNTO

JUNTO Practice: Hotel Pricing (Part 1)

Discussed on April 10, 2021.

Create a bot to compete in the hotel pricing game. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete at the next meeting.

Solutions

Click to see:

Oscar Martinez

class WynnerBot(Player):
    def __init__(
        self,
        memory,
        n_players,
        grid_size=10000,
        training=True,
    ):
        self.memory = memory
        self.n_players = n_players
        self.training = training
        feature_length = get_input_dims(n_players)
        self.state = torch.zeros(
            (memory, feature_length), device=device
        )
        self.current_observation = torch.zeros(
            (1, feature_length), device=device
        )
        self.grid_size = grid_size

    def action_plan(self) -> Plan:
        """Generate your plan for the round."""
        self.action = select_action(
            self.state, self.training
        )
        self.price = p_select_action(
            self.state, self.training
        )
        self.x, self.y = extract_coord(self.action.item())

        return Plan(
            loc=Location(x=self.x, y=self.y),
            price=self.price.item(),
        )

    # -- Hooks --

    def hook_game_start(
        self, *, player_names: Sequence[str], self_name: str
    ) -> None:
        """Triggered when the whole game starts."""
        self.self_name = self_name
        self.players = [
            name
            for name in player_names
            if name != self_name
        ]

    def hook_game_end(
        self,
        *,
        profits: Mapping[str, float],
        self_name: str
    ) -> None:
        """Triggered when the whole game ends."""
        pass

    def hook_round_start(self):
        """Triggered when a round starts, before `action_plan`."""
        pass

    def hook_round_end(
        self,
        *,
        locations: Mapping[str, Location],
        round_profit: float,
        n_customer: int,
        self_name: str
    ) -> None:
        self.previous_state = self.state.detach().clone()
        self.current_observation = construct_state(
            self_location=locations[self.self_name],
            self_profit=round_profit,
            self_price=self.price,
            self_customers=n_customer,
            locations=locations,
            players=self.players,
            grid_size=self.grid_size,
        )

        self.state = torch.cat(
            [
                self.state,
                self.current_observation.detach().clone(),
            ]
        )[1:]

        self.reward = torch.tensor(
            [round_profit], dtype=float, device=device
        )

        done = False

        replay_memory.push(
            self.previous_state.unsqueeze(0),
            self.action,
            self.state.detach().clone().unsqueeze(0),
            self.reward,
        )
        p_replay_memory.push(
            self.previous_state.unsqueeze(0),
            self.price,
            self.state.detach().clone().unsqueeze(0),
            self.reward,
        )

        if self.training:
            optimize_model()
            p_optimize_model()

Daniel Bassett

from math import sqrt


class MiddleMan(Player):
    def action_plan(self):
        horiz_bound = random.randrange(25, 75)
        vert_bound = random.randrange(25, 75)
        fxn = lambda a, b: sqrt(a + b)
        return Plan(
            loc=Location(x=horiz_bound, y=vert_bound),
            price=fxn(horiz_bound, vert_bound),
        )