Files
exercise-00/pig_lite/problem/simple_2d.py
2025-10-07 18:22:35 +02:00

530 lines
16 KiB
Python

from pig_lite.problem.base import Problem, Node
from pig_lite.instance_generation import enc
import json
import numpy as np
from collections import OrderedDict
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import TABLEAU_COLORS, XKCD_COLORS
class BaseLevel():
def __init__(self, rng, size) -> None:
self.rng = rng
self.size = size
self.field = None
self.costs = None
self.start = None
self.end = None
self.initialize_level()
def initialize_level(self):
raise NotImplementedError()
def get_field(self):
return self.field
def get_costs(self):
return self.costs
def get_start(self):
return self.start
def get_end(self):
return self.end
class MazeLevel(BaseLevel):
# this method generates a random maze according to prim's randomized
# algorithm
# http://en.wikipedia.org/wiki/Maze_generation_algorithm#Randomized_Prim.27s_algorithm
def __init__(self, rng, size):
super().__init__(rng, size)
def initialize_level(self):
self.field = np.full((self.size, self.size), enc.WALL, dtype=np.int8)
self.costs = self.rng.randint(1, 5, self.field.shape, dtype=np.int8)
self.start = (0, 0)
self.deltas = [
(0, 1),
(0, -1),
(1, 0),
(-1, 0)
]
self.random_walk()
end = np.where(self.field == enc.SPACE)
self.end = (int(end[0][-1]), int(end[1][-1]))
self.replace_walls_with_high_cost_tiles()
def replace_walls_with_high_cost_tiles(self):
# select only coordinates of walls
walls = np.where(self.field == enc.WALL)
n_walls = len(walls[0])
# replace about a tenth of the walls...
to_replace = self.rng.randint(0, n_walls, n_walls // 9)
# ... with space, but very *costly* space (it's trap!)
for ri in to_replace:
x, y = walls[0][ri], walls[1][ri]
self.field[x, y] = enc.SPACE
self.costs[x, y] = 9
def random_walk(self):
frontier = list()
sx, sy = self.start
self.field[sx, sy] = enc.SPACE
frontier.extend(self.get_walls(self.start))
while len(frontier) > 0:
current, opposing = frontier[self.rng.randint(len(frontier))]
cx, cy = current
ox, oy = opposing
if self.field[ox, oy] == enc.WALL:
self.field[cx, cy] = enc.SPACE
self.field[ox, oy] = enc.SPACE
frontier.extend(self.get_walls(opposing))
else:
frontier.remove((current, opposing))
def in_bounds(self, position):
x, y = position
return x >= 0 and y >= 0 and x < self.size and y < self.size
def get_walls(self, position):
walls = []
px, py = position
for dx, dy in self.deltas:
cx = px + dx
cy = py + dy
current = (cx, cy)
ox = px + 2 * dx
oy = py + 2 * dy
opposing = (ox, oy)
if (self.in_bounds(current) and self.field[cx, cy] == enc.WALL and self.in_bounds(opposing)):
walls.append((current, opposing))
return walls
# this is code taken from
# https://github.com/dandrino/terrain-erosion-3-ways/blob/master/util.py
# Copyright (c) 2018 Daniel Andrino
# (project is MIT licensed)
def fbm(shape, p, lower=-np.inf, upper=np.inf):
freqs = tuple(np.fft.fftfreq(n, d=1.0 / n) for n in shape)
freq_radial = np.hypot(*np.meshgrid(*freqs))
envelope = (np.power(freq_radial, p, where=freq_radial != 0) *
(freq_radial > lower) * (freq_radial < upper))
envelope[0][0] = 0.0
phase_noise = np.exp(2j * np.pi * np.random.rand(*shape))
return np.real(np.fft.ifft2(np.fft.fft2(phase_noise) * envelope))
class TerrainLevel(BaseLevel):
def __init__(self, rng, size):
super().__init__(rng, size)
def initialize_level(self):
self.field = np.full((self.size, self.size), enc.SPACE, dtype=np.int8)
self.costs = fbm(self.field.shape, -2)
self.costs -= self.costs.min()
self.costs /= self.costs.max()
self.costs *= 9
self.costs += 1
self.costs = self.costs.astype(int)
self.start = (0, 0)
self.end = (self.size - 1, self.size - 1)
x = 0
y = self.size - 1
for i in range(0, self.size):
self.field[x, y] = enc.WALL
x += 1
y -= 1
self.replace_one_or_more_walls()
def replace_one_or_more_walls(self):
# select only coordinates of walls
walls = np.where(self.field == enc.WALL)
n_walls = len(walls[0])
n_replace = self.rng.randint(1, max(2, n_walls // 5))
to_replace = self.rng.randint(0, n_walls, n_replace)
for ri in to_replace:
x, y = walls[0][ri], walls[1][ri]
self.field[x, y] = enc.SPACE
class RoomLevel(BaseLevel):
def __init__(self, rng, size):
super().__init__(rng, size)
def initialize_level(self):
self.field = np.full((self.size, self.size), enc.SPACE, dtype=np.int8)
self.costs = np.ones_like(self.field, dtype=np.float32)
k = 1
self.subdivide(self.field.view(), self.costs.view(), k, 0, 0)
# such a *crutch*!
# this 'repairs' dead ends. horrible stuff.
for x in range(1, self.size - 1):
for y in range(1, self.size - 1):
s = 0
s += self.field[x - 1, y]
s += self.field[x + 1, y]
s += self.field[x, y - 1]
s += self.field[x, y + 1]
if self.field[x, y] == enc.SPACE and s >= 3:
self.field[x - 1, y] = enc.SPACE
self.field[x + 1, y] = enc.SPACE
self.field[x, y - 1] = enc.SPACE
self.field[x, y + 1] = enc.SPACE
spaces = np.where(self.field == enc.SPACE)
n_spaces = len(spaces[0])
n_danger = self.rng.randint(3, 7)
dangers = self.rng.choice(range(n_spaces), n_danger, replace=False)
for di in dangers:
rx, ry = np.unravel_index(di, (self.size, self.size))
const = max(1., self.rng.randint(self.size // 5, self.size // 2))
for x in range(self.size):
for y in range(self.size):
distance = np.sqrt((rx - x) ** 2 + (ry - y) ** 2)
self.costs[x, y] = self.costs[x, y] + (1. / (const + distance))
self.costs = self.costs - self.costs.min()
self.costs = self.costs / self.costs.max()
self.costs = self.costs * 9
self.costs = self.costs + 1
self.costs = self.costs.astype(int)
start_choice = 0
end_choice = -1
self.start = (int(spaces[0][start_choice]), int(spaces[1][start_choice]))
self.end = (int(spaces[0][end_choice]), int(spaces[1][end_choice]))
if self.start == self.end:
raise RuntimeError('should never happen')
def subdivide(self, current, costs, k, d, previous_door):
w, h = current.shape
random_stop = self.rng.randint(0, 10) == 0 and d > 2
if w <= 2 * k + 1 or h <= 2 * k + 1 or random_stop:
return
split = previous_door
while split == previous_door:
split = self.rng.randint(k, w - k)
current[split, :] = enc.WALL
door = self.rng.randint(k, h - k)
current[split, door] = enc.SPACE
self.subdivide(
current[:split, :].T,
costs[:split, :].T,
k,
d + 1,
door
)
self.subdivide(
current[split + 1:, :].T,
costs[split + 1:, :].T,
k,
d + 1,
door
)
class Simple2DProblem(Problem):
"""
the states are the positions on the board that the agent can walk on
"""
ACTIONS_DELTA = OrderedDict([
('R', (+1, 0)),
('U', (0, -1)),
('D', (0, +1)),
('L', (-1, 0)),
])
def __init__(self, board, costs, start, end):
self.board = board
self.costs = costs
self.start_state = start
self.end_state = end
self.n_expands = 0
def get_start_node(self):
return Node(None, self.start_state, None, 0, 0)
def get_end_node(self):
return Node(None, self.end_state, None, 0, 0)
def is_end(self, node):
return node.state == self.end_state
def action_cost(self, state, action):
# for the MazeProblem, the cost of any action
# is stored at the coordinates of the successor state,
# and represents the cost of 'stepping onto' this
# position on the board
sx, sy = self.__delta_state(state, action)
return self.costs[sx, sy]
def successor(self, node, action):
# determine the next state
successor_state = self.__delta_state(node.state, action)
if successor_state is None:
return None
# determine what it would cost to take this action in this state
cost = self.action_cost(node.state, action)
# add the next state to the list of successor nodes
return Node(
node,
successor_state,
action,
node.cost + cost,
node.depth + 1
)
def get_number_of_expanded_nodes(self):
return self.n_expands
def reset(self):
self.n_expands = 0
def successors(self, node):
self.n_expands += 1
successor_nodes = []
for action in self.ACTIONS_DELTA.keys():
succ = self.successor(node, action)
if succ is not None and succ != node:
successor_nodes.append(succ)
return successor_nodes
def to_json(self):
return json.dumps(dict(
type=self.__class__.__name__,
board=self.board.tolist(),
costs=self.costs.tolist(),
start_state=self.start_state,
end_state=self.end_state
))
@staticmethod
def draw_nodes(fig, ax, name, node_collection, color, marker):
states = np.array([node.state for node in node_collection])
if len(states) > 0:
ax.scatter(states[:, 0], states[:, 1], color=color, label=name, marker=marker)
@staticmethod
def plot_nodes(fig, ax, nodes):
if len(nodes) > 0:
if len(nodes[0]) == 3:
for (name, marker, node_collection), color in zip(nodes, TABLEAU_COLORS):
if len(node_collection) > 0:
Simple2DProblem.draw_nodes(fig, ax, name, node_collection, color, marker)
else:
for name, marker, node_collection, color in nodes:
if len(node_collection) > 0:
Simple2DProblem.draw_nodes(fig, ax, name, node_collection, color, marker)
ax.legend(
bbox_to_anchor=(0.5, -0.03),
loc='upper center',
)
def plot_sequences(self, fig, ax, sequences):
start_node = self.get_start_node()
for (name, action_sequence), color in zip(sequences, XKCD_COLORS):
self.draw_path(fig, ax, name, start_node, action_sequence, color)
ax.legend(
bbox_to_anchor=(0.5, -0.03),
loc='upper center',
)
def draw_path(self, fig, ax, name, start_node, action_sequence, color):
current = start_node
xs = [current.state[0]]
ys = [current.state[1]]
us = [0]
vs = [0]
length = len(action_sequence)
cost = 0
costs = [0] * length
for i, action in enumerate(action_sequence):
costs[i] = current.cost
xs.append(current.state[0])
ys.append(current.state[1])
current = self.successor(current, action)
dx, dy = self.ACTIONS_DELTA[action]
us.append(dx)
vs.append(-dy)
cost = current.cost
quiv = ax.quiver(
xs, ys, us, vs,
color=color,
label='{} l:{} c:{}'.format(name, length, cost),
scale_units='xy',
units='xy',
scale=1,
headwidth=1,
headlength=1,
linewidth=1,
picker=5
)
return quiv
def plot_field_and_costs_aux(self, fig, show_coordinates, show_grid,
field_ax=None, costs_ax=None):
if field_ax is None:
ax = field_ax = plt.subplot(121)
else:
ax = field_ax
ax.set_title('The field')
im = ax.imshow(self.board.T, cmap='gray_r')
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0)
cbar = fig.colorbar(im, cax=cax, orientation='vertical')
cbar.set_ticks([0, 1])
cbar.set_ticklabels([0, 1])
if costs_ax is None:
ax = costs_ax = plt.subplot(122, sharex=ax, sharey=ax)
else:
ax = costs_ax
ax.set_title('The costs (for stepping on a tile)')
im = ax.imshow(self.costs.T, cmap='viridis')
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0)
cbar = fig.colorbar(im, cax=cax, orientation='vertical')
ticks = np.arange(self.costs.min(), self.costs.max() + 1)
cbar.set_ticks(ticks)
cbar.set_ticklabels(ticks)
for ax in [field_ax, costs_ax]:
ax.tick_params(
top=show_coordinates,
left=show_coordinates,
labelleft=show_coordinates,
labeltop=show_coordinates,
right=False,
bottom=False,
labelbottom=False
)
# Major ticks
s = self.board.shape[0]
ax.set_xticks(np.arange(0, s, 1))
ax.set_yticks(np.arange(0, s, 1))
# Minor ticks
ax.set_xticks(np.arange(-.5, s, 1), minor=True)
ax.set_yticks(np.arange(-.5, s, 1), minor=True)
if show_grid:
for color, ax in zip(['m', 'w'], [field_ax, costs_ax]):
# Gridlines based on minor ticks
ax.grid(which='minor', color=color, linestyle='-', linewidth=1)
return field_ax, costs_ax
def visualize(self, sequences=None, show_coordinates=False, show_grid=False, plot_filename=None):
nodes = [
('start', 'o', [self.get_start_node()]),
('end', 'o', [self.get_end_node()])
]
fig = plt.figure(figsize=(10, 7))
field_ax, costs_ax = self.plot_field_and_costs_aux(fig, show_coordinates, show_grid)
if sequences is not None and len(sequences) > 0:
self.plot_sequences(fig, field_ax, sequences)
self.plot_sequences(fig, costs_ax, sequences)
if nodes is not None and len(nodes) > 0:
Simple2DProblem.plot_nodes(fig, field_ax, nodes)
plt.tight_layout()
if plot_filename is not None:
plt.savefig(plot_filename)
plt.close(fig)
else:
plt.show()
@staticmethod
def from_json(jsonstring):
data = json.loads(jsonstring)
return Simple2DProblem(
np.array(data['board']),
np.array(data['costs']),
tuple(data['start_state']),
tuple(data['end_state'])
)
@staticmethod
def from_dict(data):
return Simple2DProblem(
np.array(data['board']),
np.array(data['costs']),
tuple(data['start_state']),
tuple(data['end_state'])
)
def __delta_state(self, state, action):
# the old state's coordinates
x, y = state
# the deltas for each coordinates
dx, dy = self.ACTIONS_DELTA[action]
# compute the coordinates of the next state
sx = x + dx
sy = y + dy
if self.__on_board(sx, sy) and self.__walkable(sx, sy):
# (sx, sy) is a *valid* state if it is on the board
# and there is no wall where we want to go
return sx, sy
else:
# EIEIEIEIEI. up until assignment 1, this returned None :/
# this had no consequences on the correctness of the algorithms,
# but the explanations, and the self-edges were wrong
return x, y
def __on_board(self, x, y):
size = len(self.board) # all boards are quadratic
return x >= 0 and x < size and y >= 0 and y < size
def __walkable(self, x, y):
return self.board[x, y] != enc.WALL