|
| 1 | +""" |
| 2 | +Prompt Strategy for finetuning Orca Mini (v2) models |
| 3 | +see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information |
| 4 | +
|
| 5 | +Use dataset type: orcamini in conig.yml to use this prompt style. |
| 6 | +
|
| 7 | +Compared to the alpaca_w_system.open_orca dataset type, |
| 8 | +this one specifies the system prompt with "### System:". |
| 9 | +
|
| 10 | +Not suited/tested for multiple-turn conversations without further adjustments. |
| 11 | +""" |
| 12 | +from typing import Generator, Union |
| 13 | + |
| 14 | +from axolotl.prompt_strategies.alpaca_w_system import OpenOrcaPromptTokenizingStrategy |
| 15 | +from axolotl.prompters import AlpacaPrompter |
| 16 | + |
| 17 | + |
| 18 | +class OrcaMiniPrompter(AlpacaPrompter): |
| 19 | + """Adjusted Prompter for Orca Mini (v2) datasets""" |
| 20 | + |
| 21 | + def match_prompt_style(self): |
| 22 | + self.turn_no_input_format = ( |
| 23 | + "### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n" |
| 24 | + ) |
| 25 | + |
| 26 | + def build_prompt_w_system( |
| 27 | + self, |
| 28 | + system: str, |
| 29 | + instruction: str, |
| 30 | + output: Union[None, str] = None, |
| 31 | + ) -> Generator[str, None, None]: |
| 32 | + # returns the full prompt from instruction and optional input |
| 33 | + # if a label (=response, =output) is provided, it's also appended. |
| 34 | + res = self.turn_no_input_format.format(system=system, instruction=instruction) |
| 35 | + if output: |
| 36 | + res = f"{res}{output}" |
| 37 | + yield res |
| 38 | + |
| 39 | + |
| 40 | +def load(tokenizer, cfg): |
| 41 | + return OpenOrcaPromptTokenizingStrategy( |
| 42 | + OrcaMiniPrompter(), |
| 43 | + tokenizer, |
| 44 | + cfg.train_on_inputs, |
| 45 | + cfg.sequence_len, |
| 46 | + ) |
0 commit comments