Minor changes for dtype and docstrings (#62)

gupta-abhay · web-flow · commit d65335f0dbea · 2025-05-15T00:24:14.000Z
- as name says
diff --git a/compose_rl/reward_learning/functional.py b/compose_rl/reward_learning/functional.py
@@ -144,9 +144,10 @@ class BadGenerationEndReward(Reward):
 
     Args:
         reward (float): The reward to apply.
-        eos_penalty (float): The penalty to apply if the response does not end with an EOS.
-        extra_special_tokens (Optional[list[str]]): The extra special tokens to check for.
+        eos_penalty (bool): The penalty to apply if the response does not end with an EOS.
         tokenizer (Tokenizer): The tokenizer to use for the reward.
+        extra_special_tokens (list[str] | None): The extra special tokens to check for.
+            Defaults to `None`.
     """
 
     # This can be run async
@@ -155,7 +156,7 @@ class BadGenerationEndReward(Reward):
     def __init__(
         self,
         reward: float,
-        eos_penalty: float,
+        eos_penalty: bool,
         tokenizer: Tokenizer,
         extra_special_tokens: list[str] | None = None,
     ):