Skip to content

Commit

Permalink
Update catanatron_gym version and deps (#266)
Browse files Browse the repository at this point in the history
* Update catanatron_gym version and deps

* Attempting create Github Actions Check

* Update gymnasium in reqs.txt

* Debug

* Sample.py

* Remove Debug

* Sample.py

* catanatron-v1

* Update sample.pr

* Update readme.md
  • Loading branch information
bcollazo authored Mar 5, 2024
1 parent 07c1ee2 commit 27474b3
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 52 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ jobs:
pip install -e catanatron_gym
- name: Inline test
run: |
python -c "import gymnasium as gym; env = gym.make('catanatron_gym:catanatron-v0')"
python -c "import gymnasium as gym; env = gym.make('catanatron_gym:catanatron-v1')"
- name: Test README.md sample (and fail even on warnings)
run: |
python -W error catanatron_gym/sample.py
build-ui:
runs-on: ubuntu-latest
Expand Down
3 changes: 2 additions & 1 deletion all-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ colorama==0.4.6
coverage==6.5.0
coveralls==3.3.1
docopt==0.6.2
Farama-Notifications==0.0.4
Flask==2.2.3
Flask-Cors==3.0.10
Flask-SQLAlchemy==3.0.3
gunicorn==20.1.0
gymnasium==0.27.1
gymnasium==0.29.1
gymnasium-notices==0.0.1
idna==3.4
iniconfig==2.0.0
Expand Down
13 changes: 7 additions & 6 deletions catanatron_gym/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ Make your training loop, ensuring to respect `env.get_valid_actions()`.
import random
import gymnasium as gym

env = gym.make("catanatron_gym:catanatron-v0")
env = gym.make("catanatron_gym:catanatron-v1")
observation, info = env.reset()
for _ in range(1000):
action = random.choice(env.get_valid_actions()) # your agent here (this takes random actions)

observation, reward, done, info = env.step(action)
# your agent here (this takes random actions)
action = random.choice(env.unwrapped.get_valid_actions())
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
observation, info = env.reset()
env.close()
Expand Down Expand Up @@ -49,7 +50,7 @@ def mask_fn(env) -> np.ndarray:


# Init Environment and Model
env = gym.make("catanatron_gym:catanatron-v0")
env = gym.make("catanatron_gym:catanatron-v1")
env = ActionMasker(env, mask_fn) # Wrap to enable masking
model = MaskablePPO(MaskableActorCriticPolicy, env, verbose=1)

Expand Down Expand Up @@ -78,7 +79,7 @@ def my_reward_function(game, p0_color):

# 3-player catan on a "Mini" map (7 tiles) until 6 points.
env = gym.make(
"catanatron_gym:catanatron-v0",
"catanatron_gym:catanatron-v1",
config={
"map_type": "MINI",
"vps_to_win": 6,
Expand Down
2 changes: 1 addition & 1 deletion catanatron_gym/catanatron_gym/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from gymnasium.envs.registration import register

register(
id="catanatron-v0",
id="catanatron-v1",
entry_point="catanatron_gym.envs:CatanatronEnv",
)
16 changes: 11 additions & 5 deletions catanatron_gym/catanatron_gym/envs/catanatron_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from gymnasium import spaces
import numpy as np

from catanatron.game import Game
from catanatron.game import Game, TURNS_LIMIT
from catanatron.models.player import Color, Player, RandomPlayer
from catanatron.models.map import BASE_MAP_TEMPLATE, NUM_NODES, LandTile, build_map
from catanatron.models.enums import RESOURCES, Action, ActionType
Expand Down Expand Up @@ -123,7 +123,7 @@ def simple_reward(game, p0_color):


class CatanatronEnv(gym.Env):
metadata = {"render.modes": []}
metadata = {"render_modes": []}

action_space = spaces.Discrete(ACTION_SPACE_SIZE)
# TODO: This could be smaller (there are many binary features). float b.c. TILE0_PROBA
Expand Down Expand Up @@ -195,8 +195,13 @@ def step(self, action):
winning_color is not None
or self.invalid_actions_count > self.max_invalid_actions
)
terminated = winning_color is not None
truncated = (
self.invalid_actions_count > self.max_invalid_actions
or self.game.state.num_turns >= TURNS_LIMIT
)
info = dict(valid_actions=self.get_valid_actions())
return observation, self.invalid_action_reward, done, info
return observation, self.invalid_action_reward, terminated, truncated, info

self.game.execute(catan_action)
self._advance_until_p0_decision()
Expand All @@ -205,10 +210,11 @@ def step(self, action):
info = dict(valid_actions=self.get_valid_actions())

winning_color = self.game.winning_color()
done = winning_color is not None
terminated = winning_color is not None
truncated = self.game.state.num_turns >= TURNS_LIMIT
reward = self.reward_function(self.game, self.p0.color)

return observation, reward, done, info
return observation, reward, terminated, truncated, info

def reset(
self,
Expand Down
14 changes: 14 additions & 0 deletions catanatron_gym/sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import random
import gymnasium as gym


env = gym.make("catanatron_gym:catanatron-v1")
observation, info = env.reset()
for _ in range(1000):
# your agent here (this takes random actions)
action = random.choice(env.unwrapped.get_valid_actions())
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
observation, info = env.reset()
env.close()
4 changes: 2 additions & 2 deletions catanatron_gym/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setuptools.setup(
name="catanatron_gym",
version="3.2.1",
version="4.0.0",
author="Bryan Collazo",
author_email="bcollazo2010@gmail.com",
description="Open AI Gym to play 1v1 Catan against a random bot",
Expand All @@ -22,5 +22,5 @@
"Operating System :: OS Independent",
],
python_requires=">=3.6",
install_requires=["catanatron", "gymnasium", "numpy"],
install_requires=["catanatron", "gymnasium==0.29.1", "numpy"],
)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
-e catanatron_experimental
click==8.1.3
cloudpickle==2.2.1
Farama-Notifications==0.0.4
Flask==2.2.3
Flask-Cors==3.0.10
Flask-SQLAlchemy==3.0.3
gunicorn==20.1.0
gymnasium==0.27.1
gymnasium==0.29.1
gymnasium-notices==0.0.1
itsdangerous==2.1.2
jax-jumpy==1.0.0
Expand Down
18 changes: 0 additions & 18 deletions sample.py

This file was deleted.

39 changes: 22 additions & 17 deletions tests/test_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ def test_gym():
assert get_p0_num_settlements(first_observation) == 0

action = random.choice(env.get_valid_actions())
second_observation, reward, done, info = env.step(action)
second_observation, reward, terminated, truncated, info = env.step(action)
assert (first_observation != second_observation).any()
assert reward == 0
assert not done
assert not terminated
assert not truncated
assert len(env.get_valid_actions()) in [2, 3]

assert second_observation[features.index("BANK_DEV_CARDS")] == 25
Expand All @@ -51,48 +52,51 @@ def test_gym():


def test_gym_registration_and_api_works():
env = gym.make("catanatron_gym:catanatron-v0")
observation, _ = env.reset()
env = gym.make("catanatron_gym:catanatron-v1")
observation, info = env.reset()
done = False
reward = 0
while not done:
action = random.choice(env.get_valid_actions()) # type: ignore
observation, reward, done, info = env.step(action)
action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
env.close()
assert reward in [-1, 1]


def test_invalid_action_reward():
env = gym.make(
"catanatron_gym:catanatron-v0", config={"invalid_action_reward": -1234}
"catanatron_gym:catanatron-v1", config={"invalid_action_reward": -1234}
)
first_obs, _ = env.reset()
invalid_action = next(filter(lambda i: i not in env.get_valid_actions(), range(1000))) # type: ignore
observation, reward, done, info = env.step(invalid_action)
observation, reward, terminated, truncated, info = env.step(invalid_action)
assert reward == -1234
assert not done
assert not terminated
assert not truncated
assert (observation == first_obs).all()
for _ in range(500):
observation, reward, done, info = env.step(invalid_action)
observation, reward, terminated, truncated, info = env.step(invalid_action)
assert (observation == first_obs).all()
assert done
assert not terminated
assert truncated


def test_custom_reward():
def custom_reward(game, p0_color):
return 123

env = gym.make(
"catanatron_gym:catanatron-v0", config={"reward_function": custom_reward}
"catanatron_gym:catanatron-v1", config={"reward_function": custom_reward}
)
observation, info = env.reset()
action = random.choice(env.get_valid_actions()) # type: ignore
observation, reward, done, info = env.step(action)
observation, reward, terminated, truncated, info = env.step(action)
assert reward == 123


def test_custom_map():
env = gym.make("catanatron_gym:catanatron-v0", config={"map_type": "MINI"})
env = gym.make("catanatron_gym:catanatron-v1", config={"map_type": "MINI"})
observation, info = env.reset()
assert len(env.get_valid_actions()) < 50 # type: ignore
assert len(observation) < 614
Expand All @@ -101,7 +105,7 @@ def test_custom_map():

def test_enemies():
env = gym.make(
"catanatron_gym:catanatron-v0",
"catanatron_gym:catanatron-v1",
config={
"enemies": [
ValueFunctionPlayer(Color.RED),
Expand All @@ -117,7 +121,8 @@ def test_enemies():
reward = 0
while not done:
action = random.choice(env.get_valid_actions()) # type: ignore
observation, reward, done, info = env.step(action)
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated

# Virtually impossible for a Random bot to beat Value Function Player
assert env.game.winning_color() == Color.RED # type: ignore
Expand All @@ -127,7 +132,7 @@ def test_enemies():

def test_mixed_rep():
env = gym.make(
"catanatron_gym:catanatron-v0",
"catanatron_gym:catanatron-v1",
config={"representation": "mixed"},
)
observation, info = env.reset()
Expand Down

0 comments on commit 27474b3

Please sign in to comment.