File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed
compose_rl/reward_learning Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -144,9 +144,10 @@ class BadGenerationEndReward(Reward):
144
144
145
145
Args:
146
146
reward (float): The reward to apply.
147
- eos_penalty (float): The penalty to apply if the response does not end with an EOS.
148
- extra_special_tokens (Optional[list[str]]): The extra special tokens to check for.
147
+ eos_penalty (bool): The penalty to apply if the response does not end with an EOS.
149
148
tokenizer (Tokenizer): The tokenizer to use for the reward.
149
+ extra_special_tokens (list[str] | None): The extra special tokens to check for.
150
+ Defaults to `None`.
150
151
"""
151
152
152
153
# This can be run async
@@ -155,7 +156,7 @@ class BadGenerationEndReward(Reward):
155
156
def __init__ (
156
157
self ,
157
158
reward : float ,
158
- eos_penalty : float ,
159
+ eos_penalty : bool ,
159
160
tokenizer : Tokenizer ,
160
161
extra_special_tokens : list [str ] | None = None ,
161
162
):
You can’t perform that action at this time.
0 commit comments