Merge pull request huggingface#165 from huggingface/smangrul/add-trl-example-in-readme

pacman100 · web-flow · commit eb07373477e0 · 2023-03-09T08:58:09.000+05:30
minor changes
diff --git a/README.md b/README.md
@@ -125,14 +125,15 @@ Try out the 🤗 Gradio Space which should run seamlessly on a T4 instance:
 
 ![peft lora dreambooth gradio space](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/peft/peft_lora_dreambooth_gradio_space.png)
 
-### Parameter Efficient Tuning of LLMs for RLHF components such as Ranker and Policy [ToDo]
-Here is an exmaple in trl library on using PEFT+INT8 for tuning policy model: [gpt2-sentiment_peft.py](https://github.com/lvwerra/trl/blob/main/examples/sentiment/scripts/gpt2-sentiment_peft.py) 
+### Parameter Efficient Tuning of LLMs for RLHF components such as Ranker and Policy
+- Here is an exmaple in [trl](https://github.com/lvwerra/trl) library using PEFT+INT8 for tuning policy model: [gpt2-sentiment_peft.py](https://github.com/lvwerra/trl/blob/main/examples/sentiment/scripts/gpt2-sentiment_peft.py) 
+- Example using PEFT for both reward model and policy [ToDo]
 
 ### INT8 training of large models in Colab using PEFT LoRA and bits_and_bytes
 
-Here is now a demo on how to fine tune [OPT-6.7b](https://huggingface.co/facebook/opt-6.7b) (14GB in fp16) in a Google colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1jCkpikz0J2o20FBQmYmAGdiKmJGOMo-o?usp=sharing)
+- Here is now a demo on how to fine tune [OPT-6.7b](https://huggingface.co/facebook/opt-6.7b) (14GB in fp16) in a Google colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1jCkpikz0J2o20FBQmYmAGdiKmJGOMo-o?usp=sharing)
 
-Here is now a demo on how to fine tune [whishper-large](openai/whisper-large-v2) (1.5B params) (14GB in fp16) in a Google colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DOkD_5OUjFa0r5Ik3SgywJLJtEo2qLxO?usp=sharing) and [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1vhF8yueFqha3Y3CpTHN6q9EVcII9EYzs?usp=sharing)
+- Here is now a demo on how to fine tune [whishper-large](openai/whisper-large-v2) (1.5B params) (14GB in fp16) in a Google colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DOkD_5OUjFa0r5Ik3SgywJLJtEo2qLxO?usp=sharing) and [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1vhF8yueFqha3Y3CpTHN6q9EVcII9EYzs?usp=sharing)
 
 ### Save compute and storage even for medium and small models
 
diff --git a/examples/int8_training/peft_bnb_whisper_large_v2_training.ipynb b/examples/int8_training/peft_bnb_whisper_large_v2_training.ipynb
@@ -1295,13 +1295,16 @@
     "from transformers import Seq2SeqTrainer, TrainerCallback, TrainingArguments, TrainerState, TrainerControl\n",
     "from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR\n",
     "\n",
+    "\n",
     "class SavePeftModelCallback(TrainerCallback):\n",
     "    def on_save(\n",
-    "        self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs,\n",
+    "        self,\n",
+    "        args: TrainingArguments,\n",
+    "        state: TrainerState,\n",
+    "        control: TrainerControl,\n",
+    "        **kwargs,\n",
     "    ):\n",
-    "        checkpoint_folder = os.path.join(\n",
-    "            args.output_dir, f\"{PREFIX_CHECKPOINT_DIR}-{state.global_step}\"\n",
-    "        )       \n",
+    "        checkpoint_folder = os.path.join(args.output_dir, f\"{PREFIX_CHECKPOINT_DIR}-{state.global_step}\")\n",
     "\n",
     "        peft_model_path = os.path.join(checkpoint_folder, \"adapter_model\")\n",
     "        kwargs[\"model\"].save_pretrained(peft_model_path)\n",
@@ -1311,6 +1314,7 @@
     "            os.remove(pytorch_model_path)\n",
     "        return control\n",
     "\n",
+    "\n",
     "trainer = Seq2SeqTrainer(\n",
     "    args=training_args,\n",
     "    model=model,\n",
@@ -1319,7 +1323,7 @@
     "    data_collator=data_collator,\n",
     "    # compute_metrics=compute_metrics,\n",
     "    tokenizer=processor.feature_extractor,\n",
-    "    callbacks=[SavePeftModelCallback]\n",
+    "    callbacks=[SavePeftModelCallback],\n",
     ")\n",
     "model.config.use_cache = False  # silence the warnings. Please re-enable for inference!"
    ]