@@ -1959,15 +1959,12 @@ def process_for_player(primary_agent_idx):
1959
1959
return final_obs_for_players
1960
1960
1961
1961
@property
1962
- def featurize_state_shape (self , num_pots = 2 ):
1962
+ def featurize_state_shape (self ):
1963
1963
warnings .warn (
1964
1964
"Using the `featurize_state_shape` property is deprecated. Please use `get_featurize_state_shape` method instead" ,
1965
1965
DeprecationWarning
1966
1966
)
1967
- num_pot_features = 10
1968
- base_features = 28
1969
- total_features = self .num_players * (num_pots * num_pot_features + base_features )
1970
- return (total_features ,)
1967
+ return self .get_featurize_state_shape (2 )
1971
1968
1972
1969
def get_featurize_state_shape (self , num_pots = 2 ):
1973
1970
num_pot_features = 10
@@ -1978,6 +1975,41 @@ def get_featurize_state_shape(self, num_pots=2):
1978
1975
def featurize_state (self , overcooked_state , mlam , num_pots = 2 , ** kwargs ):
1979
1976
"""
1980
1977
Encode state with some manually designed features. Works for arbitrary number of players
1978
+
1979
+ Arguments:
1980
+ overcooked_state (OvercookedState): state we wish to featurize
1981
+ mlam (MediumLevelActionManager): to be used for distance computations necessary for our higher-level feature encodings
1982
+ num_pots (int): Encode the state (ingredients, whether cooking or not, etc) of the 'num_pots' closest pots to each player.
1983
+ If i < num_pots pots are reachable by player i, then pots [i+1, num_pots] are encoded as all zeros. Changing this
1984
+ impacts the shape of the feature encoding
1985
+
1986
+ Returns:
1987
+ ordered_features (list[np.Array]): The ith element contains a player-centric featurized view for the ith player
1988
+
1989
+ The encoding for player i is as follows:
1990
+
1991
+ [player_i_features, other_player_features player_i_dist_to_other_players, player_i_position]
1992
+
1993
+ player_{i}_features (length num_pots*10 + 24):
1994
+ pi_orientation: length 4 one-hot-encoding of direction currently facing
1995
+ pi_obj: length 4 one-hot-encoding of object currently being held (all 0s if no object held)
1996
+ pi_wall_{j}: {0, 1} boolean value of whether player i has wall immediately in direction j
1997
+ pi_closest_{onion|tomato|dish|soup|serving|empty_counter}: (dx, dy) where dx = x dist to item, dy = y dist to item. (0, 0) if item is currently held
1998
+ pi_cloest_soup_n_{onions|tomatoes}: int value for number of this ingredient in closest soup
1999
+ pi_closest_pot_{j}_exists: {0, 1} depending on whether jth closest pot found. If 0, then all other pot features are 0. Note: can
2000
+ be 0 even if there are more than j pots on layout, if the pot is not reachable by player i
2001
+ pi_closest_pot_{j}_{is_empty|is_full|is_cooking|is_ready}: {0, 1} depending on boolean value for jth closest pot
2002
+ pi_closest_pot_{j}_{num_onions|num_tomatoes}: int value for number of this ingredient in jth closest pot
2003
+ pi_closest_pot_{j}_cook_time: int value for seconds remaining on soup. -1 if no soup is cooking
2004
+ pi_closest_pot_{j}: (dx, dy) to jth closest pot from player i location
2005
+
2006
+ other_player_features (length (num_players - 1)*(num_pots*10 + 24)):
2007
+ ordered concatenation of player_{j}_features for j != i
2008
+
2009
+ player_i_dist_to_other_players (length (num_players - 1)*2):
2010
+ [player_j.pos - player_i.pos for j != i]
2011
+
2012
+ player_i_position (length 2)
1981
2013
"""
1982
2014
1983
2015
all_features = {}
0 commit comments