@@ -89,8 +89,6 @@ def sample_sharegpt_requests(
89
89
tokenizer : PreTrainedTokenizerBase ,
90
90
fixed_output_len : Optional [int ] = None ,
91
91
) -> List [Tuple [str , int , int , None ]]:
92
- if fixed_output_len is not None and fixed_output_len < 4 :
93
- raise ValueError ("output_len too small" )
94
92
# Load the dataset.
95
93
with open (dataset_path ) as f :
96
94
dataset = json .load (f )
@@ -117,7 +115,7 @@ def sample_sharegpt_requests(
117
115
prompt_len = len (prompt_token_ids )
118
116
output_len = len (completion_token_ids
119
117
) if fixed_output_len is None else fixed_output_len
120
- if prompt_len < 4 or output_len < 4 :
118
+ if prompt_len < 4 or ( fixed_output_len is None and output_len < 4 ) :
121
119
# Prune too short sequences.
122
120
continue
123
121
if prompt_len > 1024 or prompt_len + output_len > 2048 :
@@ -228,10 +226,11 @@ def sample_hf_requests(
228
226
prompt_len = len (prompt_token_ids )
229
227
output_len = len (completion_token_ids
230
228
) if fixed_output_len is None else fixed_output_len
231
- if prompt_len < 4 or output_len < 4 :
229
+ if fixed_output_len is None and ( prompt_len < 4 or output_len < 4 ) :
232
230
# Prune too short sequences.
233
231
continue
234
- if prompt_len > 1024 or prompt_len + output_len > 2048 :
232
+ if fixed_output_len is None and \
233
+ (prompt_len > 1024 or prompt_len + output_len > 2048 ):
235
234
# Prune too long sequences.
236
235
continue
237
236
0 commit comments