import edugrid # registers the environment
from edugrid.envs import Action
import gymnasium as gym
import numpy as np
if __name__ == "__main__":
env = gym.make(
"philsteg/EduGrid-v0",
size=(3, 3),
agent_location=(0, 0),
wall_locations=[(2, slice(None))],
sink_locations=[(0, 1), (1, 0)],
target_locations=[(0, 2)],
slip_prob=0.5,
)
# Inspect the transition matrix shape
print(
f"transition matrix shape: {env.unwrapped.transition_matrix.shape}"
) # (rows, columns, actions, rows, columns) = (3, 3, 4, 3, 3)
# Modify the transition matrix so that the agent always transitions from (0, 1) to (0, 2) if the action "down" is chosen.
new_transition_prob = np.zeros((3, 3))
new_transition_prob[0, 2] = 1
env.unwrapped.transition_matrix[0, 1, Action.DOWN] = new_transition_prob
# Inspect the reward matrix shape
print(
f"reward matrix shape: {env.unwrapped.reward_matrix.shape}"
) # (rows, columns, actions, rows, columns) = (3, 3, 4, 3, 3)
# Modify the reward matrix so that the agent receives a penalty of -10 if the sink at (1, 0) is reached.
env.unwrapped.reward_matrix[:, :, :, 1, 0] = -10
# Inspect the terminal matrix shape
print(
f"terminal matrix shape: {env.unwrapped.terminal_matrix.shape}"
) # (rows, columns) = (3, 3)
# Modify the terminal matrix so that (1, 2) is terminal.
env.unwrapped.terminal_matrix[1, 2] = True