From d90cc6dc5f243bc6b9e3de9380c7f90f9a36f28e Mon Sep 17 00:00:00 2001
From: Gabriele Sarti <gabriele.sarti996@gmail.com>
Date: Wed, 19 Jun 2024 14:19:40 +0200
Subject: [PATCH 1/3] Fix skip special tokens if pre-specified in prompt

---
 inseq/models/attribution_model.py | 4 +++-
 inseq/utils/misc.py               | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/inseq/models/attribution_model.py b/inseq/models/attribution_model.py
index 42d72b3b..08e645de 100644
--- a/inseq/models/attribution_model.py
+++ b/inseq/models/attribution_model.py
@@ -411,7 +411,9 @@ def attribute(
                     "Step scores are not supported for final step methods since they do not iterate over the full"
                     " sequence. Please remove the step scores and compute them separatly passing method='dummy'."
                 )
-        input_texts, generated_texts = format_input_texts(input_texts, generated_texts)
+        input_texts, generated_texts = format_input_texts(
+            input_texts, generated_texts, skip_special_tokens, self.special_tokens
+        )
         has_generated_texts = generated_texts is not None
         if not self.is_encoder_decoder:
             for i in range(len(input_texts)):
diff --git a/inseq/utils/misc.py b/inseq/utils/misc.py
index 628995bc..2f0c178e 100644
--- a/inseq/utils/misc.py
+++ b/inseq/utils/misc.py
@@ -202,6 +202,8 @@ def isnotebook():
 def format_input_texts(
     texts: TextInput,
     ref_texts: Optional[TextInput] = None,
+    skip_special_tokens: bool = False,
+    special_tokens: list[str] = [],
 ) -> tuple[list[str], list[str]]:
     texts = [texts] if isinstance(texts, str) else texts
     reference_texts = [ref_texts] if isinstance(ref_texts, str) else ref_texts
@@ -211,6 +213,11 @@ def format_input_texts(
                 len(texts), len(reference_texts)
             )
         )
+    if skip_special_tokens:
+        for special_token in special_tokens:
+            texts = [text.replace(special_token, "") for text in texts]
+            if reference_texts is not None:
+                reference_texts = [text.replace(special_token, "") for text in reference_texts]
     return texts, reference_texts
 
 

From 067351a58e16a0e0972c588fa4e109626b4be7c7 Mon Sep 17 00:00:00 2001
From: Gabriele Sarti <gabriele.sarti996@gmail.com>
Date: Wed, 19 Jun 2024 15:26:19 +0200
Subject: [PATCH 2/3] Move attentions to cpu preemptively

---
 inseq/models/huggingface_model.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/inseq/models/huggingface_model.py b/inseq/models/huggingface_model.py
index eb374fbe..fe8a7bca 100644
--- a/inseq/models/huggingface_model.py
+++ b/inseq/models/huggingface_model.py
@@ -456,6 +456,12 @@ def get_attentions_dict(
     ) -> dict[str, MultiLayerMultiUnitScoreTensor]:
         if output.encoder_attentions is None or output.decoder_attentions is None:
             raise ValueError("Model does not support attribution relying on attention outputs.")
+        if output.encoder_attentions is not None:
+            output.encoder_attentions = tuple(att.to("cpu") for att in output.encoder_attentions)
+        if output.decoder_attentions is not None:
+            output.decoder_attentions = tuple(att.to("cpu") for att in output.decoder_attentions)
+        if output.cross_attentions is not None:
+            output.cross_attentions = tuple(att.to("cpu") for att in output.cross_attentions)
         return {
             "encoder_self_attentions": torch.stack(output.encoder_attentions, dim=1),
             "decoder_self_attentions": torch.stack(output.decoder_attentions, dim=1),
@@ -506,6 +512,8 @@ def configure_embeddings_scale(self):
     def get_attentions_dict(output: CausalLMOutput) -> dict[str, MultiLayerMultiUnitScoreTensor]:
         if output.attentions is None:
             raise ValueError("Model does not support attribution relying on attention outputs.")
+        else:
+            output.attentions = tuple(att.to("cpu") for att in output.attentions)
         return {
             "decoder_self_attentions": torch.stack(output.attentions, dim=1),
         }

From 0e318ceab94a52bc10276f72cdd43d4c3b7df7f4 Mon Sep 17 00:00:00 2001
From: Gabriele Sarti <gabriele.sarti996@gmail.com>
Date: Thu, 27 Jun 2024 16:38:40 +0200
Subject: [PATCH 3/3] Add rescale and readme changes

---
 CHANGELOG.md                                          | 8 ++++++--
 README.md                                             | 7 +++++--
 inseq/commands/attribute/attribute.py                 | 3 +++
 inseq/commands/attribute/attribute_args.py            | 8 ++++++++
 inseq/commands/attribute_context/attribute_context.py | 1 +
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b4a36ee2..9ef5672e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,15 +9,19 @@
 ## 🔧 Fixes and Refactoring
 
 - Fix the issue in the attention implementation from [#268](https://github.com/inseq-team/inseq/issues/268) where non-terminal position in the tensor were set to nan if they were 0s ([#269](https://github.com/inseq-team/inseq/pull/269)).
-  
+
 - Fix the pad token in cases where it is not specified by default in the loaded model (e.g. for Qwen models) ([#269](https://github.com/inseq-team/inseq/pull/269)).
 
 - Fix bug reported in [#266](https://github.com/inseq-team/inseq/issues/266) making `value_zeroing` unusable for SDPA attention. This enables using the method on models using SDPA attention as default (e.g. `GemmaForCausalLM`) without passing `model_kwargs={'attn_implementation': 'eager'}` ([#267](https://github.com/inseq-team/inseq/pull/267)).
 
+- Fix multi-device support and duplicate BOS for chat template models ([#280](https://github.com/inseq-team/inseq/pull/280)).
+
+- Add `rescale_attributions` to Inseq CLI commands for `rescale=True` ([#280](https://github.com/inseq-team/inseq/pull/280)).
+
 ## 📝 Documentation and Tutorials
 
 *No changes*
 
 ## 💥 Breaking Changes
 
-*No changes*
\ No newline at end of file
+*No changes*
diff --git a/README.md b/README.md
index 6f20a99e..7757aa7d 100644
--- a/README.md
+++ b/README.md
@@ -280,7 +280,7 @@ Our vision for Inseq is to create a centralized, comprehensive and robust set of
 
 ## Citing Inseq
 
-If you use Inseq in your research we suggest to include a mention to the specific release (e.g. v0.4.0) and we kindly ask you to cite our reference paper as:
+If you use Inseq in your research we suggest to include a mention to the specific release (e.g. v0.6.0) and we kindly ask you to cite our reference paper as:
 
 ```bibtex
 @inproceedings{sarti-etal-2023-inseq,
@@ -308,7 +308,7 @@ If you use Inseq in your research we suggest to include a mention to the specifi
 Inseq has been used in various research projects. A list of known publications that use Inseq to conduct interpretability analyses of generative models is shown below.
 
 > [!TIP]
-> Last update: May 2024. Please open a pull request to add your publication to the list.
+> Last update: June 2024. Please open a pull request to add your publication to the list.
 
 <details>
   <summary><b>2023</b></summary>
@@ -331,6 +331,9 @@ Inseq has been used in various research projects. A list of known publications t
     <li><a href="https://arxiv.org/abs/2402.00794">ReAGent: A Model-agnostic Feature Attribution Method for Generative Language Models</a> (Zhao et al., 2024)</li>
     <li><a href="https://arxiv.org/abs/2404.02421">Revisiting subword tokenization: A case study on affixal negation in large language models</a> (Truong et al., 2024)</li>
     <li><a href="https://hal.science/hal-04581586">Exploring NMT Explainability for Translators Using NMT Visualising Tools</a> (Gonzalez-Saez et al., 2024)</li>
+    <li><a href="https://arxiv.org/abs/2405.14899">DETAIL: Task DEmonsTration Attribution for Interpretable In-context Learning</a> (Zhou et al., 2024)</li>
+    <li><a href="https://arxiv.org/abs/2406.06399">Should We Fine-Tune or RAG? Evaluating Different Techniques to Adapt LLMs for Dialogue</a> (Alghisi et al., 2024)</li>
+    <li><a href="https://arxiv.org/abs/2406.13663">Model Internals-based Answer Attribution for Trustworthy Retrieval-Augmented Generation</a> (Qi, Sarti et al., 2024)</li>
   </ol>
 
 </details>
diff --git a/inseq/commands/attribute/attribute.py b/inseq/commands/attribute/attribute.py
index 4869b846..95c4e5d7 100644
--- a/inseq/commands/attribute/attribute.py
+++ b/inseq/commands/attribute/attribute.py
@@ -11,12 +11,14 @@ def aggregate_attribution_scores(
     selectors: Optional[list[int]] = None,
     aggregators: Optional[list[str]] = None,
     normalize_attributions: bool = False,
+    rescale_attributions: bool = False,
 ) -> FeatureAttributionOutput:
     if selectors is not None and aggregators is not None:
         for select_idx, aggregator_fn in zip(selectors, aggregators):
             out = out.aggregate(
                 aggregator=aggregator_fn,
                 normalize=normalize_attributions,
+                rescale=rescale_attributions,
                 select_idx=select_idx,
                 do_post_aggregation_checks=False,
             )
@@ -79,6 +81,7 @@ def attribute(input_texts, generated_texts, args: AttributeExtendedArgs):
                 selectors=args.attribution_selectors,
                 aggregators=args.attribution_aggregators,
                 normalize_attributions=args.normalize_attributions,
+                rescale_attributions=args.rescale_attributions,
             )
         print(f"Saving {'aggregated ' if args.aggregate_output else ''}attributions to {args.save_path}")
         out.save(args.save_path, overwrite=True)
diff --git a/inseq/commands/attribute/attribute_args.py b/inseq/commands/attribute/attribute_args.py
index ee707b5f..dfca76dd 100644
--- a/inseq/commands/attribute/attribute_args.py
+++ b/inseq/commands/attribute/attribute_args.py
@@ -61,6 +61,14 @@ class AttributeBaseArgs:
             "for each context are normalized to sum up to 1, providing a relative notion of input salience."
         ),
     )
+    rescale_attributions: bool = cli_arg(
+        default=False,
+        help=(
+            "Whether to rescale the attribution scores for each context. If ``True``, the attribution scores "
+            "for each context are rescaled to sum up to the number of tokens in the input, providing an absolute"
+            " notion of input salience."
+        ),
+    )
     model_kwargs: dict = cli_arg(
         default_factory=dict,
         help="Additional keyword arguments passed to the model constructor in JSON format.",
diff --git a/inseq/commands/attribute_context/attribute_context.py b/inseq/commands/attribute_context/attribute_context.py
index 1eb72126..e2bafe7e 100644
--- a/inseq/commands/attribute_context/attribute_context.py
+++ b/inseq/commands/attribute_context/attribute_context.py
@@ -211,6 +211,7 @@ def attribute_context_with_model(args: AttributeContextArgs, model: HuggingfaceM
             selectors=args.attribution_selectors,
             aggregators=args.attribution_aggregators,
             normalize_attributions=args.normalize_attributions,
+            rescale_attributions=args.rescale_attributions,
         )[0]
         if args.show_intermediate_outputs:
             cci_attrib_out.show(do_aggregation=False)