@@ -66,15 +66,34 @@ def build_prompt_w_system(
66
66
) -> Generator [str , None , None ]:
67
67
# returns the full prompt from instruction and optional input
68
68
# if a label (=response, =output) is provided, it's also appended.
69
+ formatted_sys_prompt = f"### System:\n { system } \n \n " if system else ""
69
70
if input :
70
- res = system + self .turn_format .format (instruction = instruction , input = input )
71
+ res = formatted_sys_prompt + self .turn_format .format (
72
+ instruction = instruction , input = input
73
+ )
71
74
else :
72
- res = system + self .turn_no_input_format .format (instruction = instruction )
75
+ res = formatted_sys_prompt + self .turn_no_input_format .format (
76
+ instruction = instruction
77
+ )
73
78
if output :
74
79
res = f"{ res } { output } "
75
80
yield res
76
81
77
82
83
+ class OpenOrcaSystemDataPrompter (SystemDataPrompter ):
84
+ """
85
+ Alpaca Style Prompter that uses system prompts from the dataset, with OpenOrca prompts
86
+ """
87
+
88
+ def match_prompt_style (self ):
89
+ if self .prompt_style == PromptStyle .INSTRUCT .value :
90
+ self .turn_format = "### User:\n {instruction}\n \n ### Additional Context:\n {input}\n \n ### Assistant:\n "
91
+ self .turn_no_input_format = "### User:\n {instruction}\n \n ### Assistant:\n "
92
+ if self .prompt_style == PromptStyle .CHAT .value :
93
+ self .turn_format = "USER: {instruction}\n {input}\n ASSISTANT:"
94
+ self .turn_no_input_format = "USER: {instruction}\n ASSISTANT:"
95
+
96
+
78
97
class OpenOrcaPromptTokenizingStrategy (InstructionWSystemPromptTokenizingStrategy ):
79
98
"""
80
99
Tokenizing strategy for OpenOrca datasets
@@ -113,7 +132,7 @@ def load_chat(tokenizer, cfg):
113
132
114
133
def load_open_orca (tokenizer , cfg ):
115
134
return OpenOrcaPromptTokenizingStrategy (
116
- SystemDataPrompter (PromptStyle .INSTRUCT .value ),
135
+ OpenOrcaSystemDataPrompter (PromptStyle .INSTRUCT .value ),
117
136
tokenizer ,
118
137
cfg .train_on_inputs ,
119
138
cfg .sequence_len ,
0 commit comments