v0.3.3
Model download: https://huggingface.co/unit-mesh/autodev-deepseek-6.7b-finetunes
# Merge and shuffle records from different files
merge_jsonl(
output_file=merged_file,
input_files=[oss_instruction, python_instruction, 'code_bugfix_cleaned_5K.json', 'codeGPT_CN_cleaned_20K.json',
'code_summarization_CN_cleaned_10K.json', 'code_generation_CN_cleaned_5K.json', 'summary.jsonl'],
lines_per_file=[4000, 4000, 4000, 15000, 8000, 4000, 25000]
)