Skip to content

Commit d65335f

Browse files
authored
Minor changes for dtype and docstrings (#62)
- as name says
1 parent 279e588 commit d65335f

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

compose_rl/reward_learning/functional.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,10 @@ class BadGenerationEndReward(Reward):
144144
145145
Args:
146146
reward (float): The reward to apply.
147-
eos_penalty (float): The penalty to apply if the response does not end with an EOS.
148-
extra_special_tokens (Optional[list[str]]): The extra special tokens to check for.
147+
eos_penalty (bool): The penalty to apply if the response does not end with an EOS.
149148
tokenizer (Tokenizer): The tokenizer to use for the reward.
149+
extra_special_tokens (list[str] | None): The extra special tokens to check for.
150+
Defaults to `None`.
150151
"""
151152

152153
# This can be run async
@@ -155,7 +156,7 @@ class BadGenerationEndReward(Reward):
155156
def __init__(
156157
self,
157158
reward: float,
158-
eos_penalty: float,
159+
eos_penalty: bool,
159160
tokenizer: Tokenizer,
160161
extra_special_tokens: list[str] | None = None,
161162
):

0 commit comments

Comments
 (0)