Stop generation with Continuation when a specific string was generated

rlouf · rlouf · commit ec281ef77dc5 · 2023-07-13T14:58:20.000+02:00
diff --git a/outlines/text/generate/continuation.py b/outlines/text/generate/continuation.py
@@ -17,11 +17,12 @@ class Continuation(Sequence):
 
     """
 
-    def __init__(self, model, max_tokens: Optional[int]):
+    def __init__(self, model, stop: List[str], max_tokens: Optional[int]):
         super().__init__(model, max_tokens)
         self.eos_token_id = torch.tensor(
             [self.model.tokenizer.eos_token_id], device=self.device
         )
+        self.stop_sequences = stop
 
     def is_finished(self, token_ids: torch.LongTensor) -> torch.BoolTensor:
         """Determine whether the sequences reached maximum length of end with
@@ -38,15 +39,41 @@ def is_finished(self, token_ids: torch.LongTensor) -> torch.BoolTensor:
             The input sequences.
 
         """
-        return token_ids[:, -1] == self.model.tokenizer.eos_token_id
+
+        sequences = self.model.tokenizer.decode(token_ids)
+        is_stop_sequence_found = []
+        for sequence in sequences:
+            found = False
+            for stop_str in self.stop_sequences:
+                if stop_str in sequence:
+                    found = True
+
+            is_stop_sequence_found.append(found)
+
+        is_stop_sequence_found = torch.tensor(is_stop_sequence_found, dtype=torch.bool)
+        is_eos_found = token_ids[:, -1] == self.model.tokenizer.eos_token_id
+
+        return torch.logical_or(is_eos_found, is_stop_sequence_found)
 
     def postprocess_completions(self, completions: List[str]) -> List[str]:
         """Remove the EOS token from the completion."""
-        return [
+        without_eos = [
             completion.replace(self.model.tokenizer.eos_token, "")
             for completion in completions
         ]
 
+        completions = []
+        for completion in without_eos:
+            for stop_str in self.stop_sequences:
+                idx = completion.find(stop_str)
+                if idx > 0:
+                    completions.append(completion[:idx])
+                    break
+
+            completions.append(completion)
+
+        return completions
+
 
-def continuation(model, max_tokens: Optional[int] = None):
-    return Continuation(model, max_tokens)
+def continuation(model, *, stop: List[str] = [], max_tokens: Optional[int] = None):
+    return Continuation(model, stop, max_tokens)
diff --git a/outlines/text/generate/sequence.py b/outlines/text/generate/sequence.py
@@ -229,7 +229,9 @@ def __call__(
             )
             token_ids = self.update_token_ids(is_finished, token_ids, updated_token_ids)
             attention_mask = self.expand_attention_mask(attention_mask)
-            is_finished[~is_finished] = self.is_finished(updated_token_ids).flatten()
+            is_finished[~is_finished] = self.is_finished(
+                updated_token_ids[:, num_prompt_tokens:]
+            ).flatten()
 
         result = self.model.tokenizer.decode(token_ids)
         result = self.postprocess_completions(result)
diff --git a/tests/text/generate/test_continuation.py b/tests/text/generate/test_continuation.py
@@ -1,5 +1,4 @@
-import numpy as np
-from numpy.testing import assert_array_equal
+import torch
 
 from outlines.text.generate.continuation import Continuation, continuation
 
@@ -9,35 +8,55 @@ class Tokenizer:
     eos_token_id = 0
     pad_token_id = -1
 
+    def decode(self, token_ids):
+        return ["Test"] * token_ids.shape[0]
+
 
 class Model:
     tokenizer = Tokenizer()
     device = "cpu"
 
 
-def test_continuation_is_finished():
-    model = continuation(Model(), 10)
+def test_continuation_eos_is_finished():
+    model = continuation(Model())
     assert isinstance(model, Continuation)
 
-    token_ids = np.array([[3, 2]])
+    token_ids = torch.tensor([[3, 2]])
     result = model.is_finished(token_ids)
-    assert_array_equal(result, [False])
+    assert torch.equal(result, torch.tensor([False]))
 
-    token_ids = np.array([[3, 2, 0]])
+    token_ids = torch.tensor([[3, 2, 0]])
     result = model.is_finished(token_ids)
-    assert_array_equal(result, [True])
+    assert torch.equal(result, torch.tensor([True]))
 
-    token_ids = np.array([[3, 2, 1], [3, 2, 0]])
+    token_ids = torch.tensor([[3, 2, 1], [3, 2, 0]])
     result = model.is_finished(token_ids)
-    assert_array_equal(result, [False, True])
+    assert torch.equal(result, torch.tensor([False, True]))
 
-    token_ids = np.array([[3, 2, 1, 0], [3, 2, 0, -1]])
+    token_ids = torch.tensor([[3, 2, 1, 0], [3, 2, 0, -1]])
     result = model.is_finished(token_ids)
-    assert_array_equal(result, [True, False])
+    assert torch.equal(result, torch.tensor([True, False]))
 
 
 def test_continuation_postprocess():
     model = continuation(Model())
     result = model.postprocess_completions(["Here<EOS>"])
     assert len(result) == 1
     assert result[0] == "Here"
+
+
+def test_continuation_stop_is_finished():
+    tokenizer = Tokenizer()
+    tokenizer.decode = lambda x: ["finished \n", "not_finished"]
+    model = Model()
+    model.tokenizer = tokenizer
+
+    model = continuation(model, stop=["\n"])
+
+    token_ids = torch.tensor([[2, 3]])
+    result = model.is_finished(token_ids)
+    assert torch.equal(result, torch.tensor([True, False]))
+
+
+def test_continuation_stop_postprocess():
+    assert False