JUNTO Practice: Hotel Pricing (Part 1)

Discussed on April 10, 2021.

Create a bot to compete in the hotel pricing game. (Refer to the Jupyter notebook that I emailed you.) We will have the bots compete at the next meeting.

Solutions

Click to see:

Oscar Martinez

``````class WynnerBot(Player):
def __init__(
self,
memory,
n_players,
grid_size=10000,
training=True,
):
self.memory = memory
self.n_players = n_players
self.training = training
feature_length = get_input_dims(n_players)
self.state = torch.zeros(
(memory, feature_length), device=device
)
self.current_observation = torch.zeros(
(1, feature_length), device=device
)
self.grid_size = grid_size

def action_plan(self) -> Plan:
"""Generate your plan for the round."""
self.action = select_action(
self.state, self.training
)
self.price = p_select_action(
self.state, self.training
)
self.x, self.y = extract_coord(self.action.item())

return Plan(
loc=Location(x=self.x, y=self.y),
price=self.price.item(),
)

# -- Hooks --

def hook_game_start(
self, *, player_names: Sequence[str], self_name: str
) -> None:
"""Triggered when the whole game starts."""
self.self_name = self_name
self.players = [
name
for name in player_names
if name != self_name
]

def hook_game_end(
self,
*,
profits: Mapping[str, float],
self_name: str
) -> None:
"""Triggered when the whole game ends."""
pass

def hook_round_start(self):
"""Triggered when a round starts, before `action_plan`."""
pass

def hook_round_end(
self,
*,
locations: Mapping[str, Location],
round_profit: float,
n_customer: int,
self_name: str
) -> None:
self.previous_state = self.state.detach().clone()
self.current_observation = construct_state(
self_location=locations[self.self_name],
self_profit=round_profit,
self_price=self.price,
self_customers=n_customer,
locations=locations,
players=self.players,
grid_size=self.grid_size,
)

self.state = torch.cat(
[
self.state,
self.current_observation.detach().clone(),
]
)[1:]

self.reward = torch.tensor(
[round_profit], dtype=float, device=device
)

done = False

replay_memory.push(
self.previous_state.unsqueeze(0),
self.action,
self.state.detach().clone().unsqueeze(0),
self.reward,
)
p_replay_memory.push(
self.previous_state.unsqueeze(0),
self.price,
self.state.detach().clone().unsqueeze(0),
self.reward,
)

if self.training:
optimize_model()
p_optimize_model()
``````

Daniel Bassett

``````from math import sqrt

class MiddleMan(Player):
def action_plan(self):
horiz_bound = random.randrange(25, 75)
vert_bound = random.randrange(25, 75)
fxn = lambda a, b: sqrt(a + b)
return Plan(
loc=Location(x=horiz_bound, y=vert_bound),
price=fxn(horiz_bound, vert_bound),
)
``````