Skip to content

Commit 1800a80

Browse files
committed
add random uuid
1 parent 6084145 commit 1800a80

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

src/genesys/generate.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from tqdm import tqdm
77
from transformers import AutoTokenizer
88
from genesys.utils import GcpBucket, repeat_elements, save_batch_results
9+
import uuid
910

1011
SYSTEM_PROMPT = "Solve the following math problem efficiently and clearly. Think carefully and step by step about your response and reason before providing a final response. Conclude your response with: \n\nTherefore, the final answer is: $\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression that solves the problem. If the question is a multiple choice question, [answer] should be the letter indicating your correct response (e.g. \\text{A} or \\text{B})."
1112

@@ -44,7 +45,6 @@ def main(config: Config):
4445
max_samples = config.max_samples if config.max_samples is not None else len(math_dataset)
4546

4647
all_results = []
47-
file_counter = 0
4848

4949
for i in tqdm(range(0, min(max_samples, len(math_dataset)), config.batch_size), desc="Generating data"):
5050
batch = math_dataset[i : min(i + config.batch_size, len(math_dataset))]
@@ -73,10 +73,9 @@ def main(config: Config):
7373
all_results.append(result)
7474

7575
if len(all_results) >= config.sample_per_file:
76-
file_name = f"out_{file_counter}.jsonl"
76+
file_name = f"out_{uuid.uuid4()}.jsonl"
7777
save_batch_results(all_results, file_name, gcp_bucket)
7878
all_results = []
79-
file_counter += 1
8079

8180

8281
if __name__ == "__main__":

0 commit comments

Comments
 (0)