diff --git a/catanatron_core/catanatron/models/actions.py b/catanatron_core/catanatron/models/actions.py index 9bb414dc..553cbcf2 100644 --- a/catanatron_core/catanatron/models/actions.py +++ b/catanatron_core/catanatron/models/actions.py @@ -298,25 +298,24 @@ def inner_maritime_trade_possibilities(hand_freqdeck, bank_freqdeck, port_resour trade_offers = set() # Get lowest rate per resource - rates: Dict[FastResource, int] = {WOOD: 4, BRICK: 4, SHEEP: 4, WHEAT: 4, ORE: 4} + rates = {res: 4 for res in RESOURCES} if None in port_resources: - rates = {WOOD: 3, BRICK: 3, SHEEP: 3, WHEAT: 3, ORE: 3} + rates = {res: 3 for res in RESOURCES} for resource in port_resources: - if resource != None: + if resource is not None: rates[resource] = 2 - # For resource in hand - for index, resource in enumerate(RESOURCES): - amount = hand_freqdeck[index] - if amount >= rates[resource]: - resource_out: List[Any] = [resource] * rates[resource] - resource_out += [None] * (4 - rates[resource]) - for j_resource in RESOURCES: - if ( - resource != j_resource - and freqdeck_count(bank_freqdeck, j_resource) > 0 - ): - trade_offer = tuple(resource_out + [j_resource]) - trade_offers.add(trade_offer) + # For each resource in hand + for give_idx, amount in enumerate(hand_freqdeck): + rate = rates[RESOURCES[give_idx]] + if amount >= rate: + # Try trading for each other resource the bank has + for receive_idx, bank_amount in enumerate(bank_freqdeck): + if receive_idx != give_idx and bank_amount > 0: + # Create concatenated freqdeck [give_5 + receive_5] + freqdeck = [0] * 10 + freqdeck[give_idx] = rate + freqdeck[5 + receive_idx] = 1 + trade_offers.add(tuple(freqdeck)) return trade_offers diff --git a/catanatron_core/catanatron/state.py b/catanatron_core/catanatron/state.py index 665eab8c..a12a343b 100644 --- a/catanatron_core/catanatron/state.py +++ b/catanatron_core/catanatron/state.py @@ -589,19 +589,24 @@ def apply_action(state: State, action: Action): state.current_prompt = ActionPrompt.PLAY_TURN state.playable_actions = generate_playable_actions(state) elif action.action_type == ActionType.MARITIME_TRADE: - trade_offer = action.value - offering = freqdeck_from_listdeck( - filter(lambda r: r is not None, trade_offer[:-1]) - ) - asking = freqdeck_from_listdeck(trade_offer[-1:]) - if not player_resource_freqdeck_contains(state, action.color, offering): + # action.value is now a 10-length tuple of integers [give_5 + receive_5] + giving_freqdeck = list(action.value[:5]) # First 5 are resources given + receiving_freqdeck = list(action.value[5:]) # Last 5 are resources received + + # Validate player has resources + if not player_resource_freqdeck_contains(state, action.color, giving_freqdeck): raise ValueError("Trying to trade without money") - if not freqdeck_contains(state.resource_freqdeck, asking): - raise ValueError("Bank doenst have those cards") - player_freqdeck_subtract(state, action.color, offering) - state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, offering) - player_freqdeck_add(state, action.color, asking) - state.resource_freqdeck = freqdeck_subtract(state.resource_freqdeck, asking) + # Validate bank has resources + if not freqdeck_contains(state.resource_freqdeck, receiving_freqdeck): + raise ValueError("Bank doesn't have those cards") + + # Execute trade + player_freqdeck_subtract(state, action.color, giving_freqdeck) + state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, giving_freqdeck) + player_freqdeck_add(state, action.color, receiving_freqdeck) + state.resource_freqdeck = freqdeck_subtract( + state.resource_freqdeck, receiving_freqdeck + ) # state.current_player_index stays the same state.current_prompt = ActionPrompt.PLAY_TURN diff --git a/catanatron_gym/catanatron_gym/envs/catanatron_env.py b/catanatron_gym/catanatron_gym/envs/catanatron_env.py index a5c76d03..a62653a7 100644 --- a/catanatron_gym/catanatron_gym/envs/catanatron_env.py +++ b/catanatron_gym/catanatron_gym/envs/catanatron_env.py @@ -20,14 +20,59 @@ BASE_TOPOLOGY = BASE_MAP_TEMPLATE.topology TILE_COORDINATES = [x for x, y in BASE_TOPOLOGY.items() if y == LandTile] + + +def generate_trade_actions(): + """Generates all possible maritime trade actions in freqdeck format""" + trade_actions = [] + + # 4:1 trades + for give_resource in RESOURCES: + give_idx = RESOURCES.index(give_resource) + for receive_resource in RESOURCES: + if give_resource != receive_resource: + receive_idx = RESOURCES.index(receive_resource) + # Create 10-length freqdeck [giving_5 + receiving_5] + freqdeck = [0] * 10 + freqdeck[give_idx] = 4 # Give 4 resources + freqdeck[5 + receive_idx] = 1 # Receive 1 resource + trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck))) + + # 3:1 port trades + for give_resource in RESOURCES: + give_idx = RESOURCES.index(give_resource) + for receive_resource in RESOURCES: + if give_resource != receive_resource: + receive_idx = RESOURCES.index(receive_resource) + freqdeck = [0] * 10 + freqdeck[give_idx] = 3 # Give 3 resources + freqdeck[5 + receive_idx] = 1 # Receive 1 resource + trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck))) + + # 2:1 port trades + for give_resource in RESOURCES: + give_idx = RESOURCES.index(give_resource) + for receive_resource in RESOURCES: + if give_resource != receive_resource: + receive_idx = RESOURCES.index(receive_resource) + freqdeck = [0] * 10 + freqdeck[give_idx] = 2 # Give 2 resources + freqdeck[5 + receive_idx] = 1 # Receive 1 resource + trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck))) + + return trade_actions + + ACTIONS_ARRAY = [ (ActionType.ROLL, None), - # TODO: One for each tile (and abuse 1v1 setting). + # Move robber actions *[(ActionType.MOVE_ROBBER, tile) for tile in TILE_COORDINATES], (ActionType.DISCARD, None), + # Build actions *[(ActionType.BUILD_ROAD, tuple(sorted(edge))) for edge in get_edges()], *[(ActionType.BUILD_SETTLEMENT, node_id) for node_id in range(NUM_NODES)], *[(ActionType.BUILD_CITY, node_id) for node_id in range(NUM_NODES)], + # Development card actions (ActionType.BUY_DEVELOPMENT_CARD, None), (ActionType.PLAY_KNIGHT_CARD, None), *[ @@ -38,27 +83,8 @@ *[(ActionType.PLAY_YEAR_OF_PLENTY, (first_card,)) for first_card in RESOURCES], (ActionType.PLAY_ROAD_BUILDING, None), *[(ActionType.PLAY_MONOPOLY, r) for r in RESOURCES], - # 4:1 with bank - *[ - (ActionType.MARITIME_TRADE, tuple(4 * [i] + [j])) - for i in RESOURCES - for j in RESOURCES - if i != j - ], - # 3:1 with port - *[ - (ActionType.MARITIME_TRADE, tuple(3 * [i] + [None, j])) # type: ignore - for i in RESOURCES - for j in RESOURCES - if i != j - ], - # 2:1 with port - *[ - (ActionType.MARITIME_TRADE, tuple(2 * [i] + [None, None, j])) # type: ignore - for i in RESOURCES - for j in RESOURCES - if i != j - ], + # Maritime trade actions + *generate_trade_actions(), (ActionType.END_TURN, None), ] ACTION_SPACE_SIZE = len(ACTIONS_ARRAY) @@ -70,6 +96,7 @@ def to_action_type_space(action): def normalize_action(action): + """Normalize action value to ensure it can be found in ACTIONS_ARRAY""" normalized = action if normalized.action_type == ActionType.ROLL: return Action(action.color, action.action_type, None) diff --git a/tests/test_gym.py b/tests/test_gym.py index 472bce14..91a9f74d 100644 --- a/tests/test_gym.py +++ b/tests/test_gym.py @@ -126,7 +126,7 @@ def test_enemies(): # Virtually impossible for a Random bot to beat Value Function Player assert env.game.winning_color() == Color.RED # type: ignore - assert reward - 1 + assert reward == -1 env.close() diff --git a/tests/test_state.py b/tests/test_state.py index cb4414f8..5dc658d0 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -76,11 +76,21 @@ def test_trade_execution(): state = State(players) player_deck_replenish(state, players[0].color, BRICK, 4) - trade_offer = tuple([BRICK] * 4 + [ORE]) - action = Action(players[0].color, ActionType.MARITIME_TRADE, trade_offer) + + # Create trade freqdeck + brick_idx = RESOURCES.index(BRICK) + ore_idx = RESOURCES.index(ORE) + give_freqdeck = [0] * 5 + receive_freqdeck = [0] * 5 + give_freqdeck[brick_idx] = 4 # Give 4 BRICK + receive_freqdeck[ore_idx] = 1 # Get 1 ORE + trade_freqdeck = tuple(give_freqdeck + receive_freqdeck) + + action = Action(players[0].color, ActionType.MARITIME_TRADE, trade_freqdeck) apply_action(state, action) - assert player_num_resource_cards(state, players[0].color) == 1 + assert player_num_resource_cards(state, players[0].color, BRICK) == 0 + assert player_num_resource_cards(state, players[0].color, ORE) == 1 assert sum(state.resource_freqdeck) == 19 * 5 + 4 - 1 diff --git a/ui/src/components/Prompt.js b/ui/src/components/Prompt.js index d7eeab52..e3a3e80e 100644 --- a/ui/src/components/Prompt.js +++ b/ui/src/components/Prompt.js @@ -90,8 +90,29 @@ export function humanizeAction(gameState, action) { } export function humanizeTradeAction(action) { - const out = action[2].slice(0, 4).filter((resource) => resource !== null); - return `${out.length} ${out[0]} => ${action[2][4]}`; + const freqdeck = action[2]; + const RESOURCES = ['WOOD', 'BRICK', 'SHEEP', 'WHEAT', 'ORE']; + + const resourcesGiven = []; + const resourcesReceived = []; + + // Parse resources given (indices 0-4) + for (let i = 0; i < 5; i++) { + const amount = freqdeck[i]; + if (amount > 0) { + resourcesGiven.push(`${amount} ${RESOURCES[i]}`); + } + } + + // Parse resources received (indices 5-9) + for (let i = 5; i < 10; i++) { + const amount = freqdeck[i]; + if (amount > 0) { + resourcesReceived.push(`${amount} ${RESOURCES[i - 5]}`); + } + } + + return `${resourcesGiven.join(', ')} => ${resourcesReceived.join(', ')}`; } function humanizePrompt(current_prompt) {