@@ -78,7 +78,8 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
78
78
break ;
79
79
}
80
80
}
81
- if (match<0 ){
81
+ if (match < 0 )
82
+ {
82
83
LOG_ERR (" \n couldn't match %s" , llama_token_to_piece (ctx, inp[index]).c_str ());
83
84
exit (1 );
84
85
}
@@ -133,14 +134,13 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
133
134
int block_size = (bit_offset + PAD) / 8 - block_start;
134
135
if (block_size >= 256 )
135
136
{
136
- // TODO: handle more than 256 bytes of block data
137
+ // TODO: handle more than 256 bytes of block data
137
138
// (maybe allow multiple blocks in a row)
138
139
LOG_ERR (" Block too big %d >= 256" , block_size);
139
140
exit (-1 );
140
141
}
141
142
sample_ids_bitpacked[block_start + 1 ] = block_size & 0xff ;
142
143
143
-
144
144
// put last bytes
145
145
if (PAD)
146
146
{
@@ -212,7 +212,7 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
212
212
int block_size = (bit_offset + PAD) / 8 - block_start;
213
213
// endianness: big endian
214
214
sample_ids_bitpacked[block_start + 1 ] = block_size & 0xff ;
215
- total_pad+= PAD;
215
+ total_pad += PAD;
216
216
}
217
217
llama_batch_free (batch);
218
218
return sample_ids_bitpacked;
@@ -330,7 +330,7 @@ std::vector<llama_token> decode(llama_context *ctx, gpt_sampler *smpl, std::vect
330
330
331
331
auto &cur_p = smpl->cur_p ; // initialized by set_logits
332
332
llama_sampler_apply (smpl->chain , &cur_p);
333
-
333
+
334
334
auto token_id = cur_p.data [sample_id].id ;
335
335
out.push_back (token_id);
336
336
if (!inp.size () || token_id == inp[num_raw_tokens_header + index])
@@ -482,7 +482,7 @@ int main(int argc, char **argv)
482
482
params.sparams .top_p = 1 ;
483
483
params.sparams .top_k = -1 ;
484
484
// Avoid temp=0 because greedy sampling breaks stuff
485
- params.sparams .temp = 1 .;
485
+ params.sparams .temp = 1 .;
486
486
487
487
gpt_init ();
488
488
@@ -544,38 +544,43 @@ int main(int argc, char **argv)
544
544
auto t_enc_end = ggml_time_us ();
545
545
546
546
LOG (" \n " );
547
- if (!params.no_perf ){
547
+ if (!params.no_perf )
548
+ {
548
549
LOG (" \n Input: %d characters (%d tokens)" , params.prompt .length (), inp.size ());
549
550
550
551
float compressed_bits_per_token = 8 * (float )sample_ids_bitpacked.size () / (float )inp.size ();
551
552
float compressed_bits_per_char = 8 * (float )sample_ids_bitpacked.size () / (float )params.prompt .length ();
552
553
553
554
LOG (" \n %d compressed bytes,(%04f bits per token, %04f bits per character)\n " , (int )sample_ids_bitpacked.size (), compressed_bits_per_token, compressed_bits_per_char);
554
- LOG (" \n %d padding bits, (%04f bits per character without padding)" , total_pad, compressed_bits_per_char - total_pad/ (float )params.prompt .length ());
555
- LOG (" \n PPL (over)estimation: %04f (%04f with padding)" , exp2 (compressed_bits_per_token- total_pad/ (float )inp.size ()),exp2 (compressed_bits_per_token));
555
+ LOG (" \n %d padding bits, (%04f bits per character without padding)" , total_pad, compressed_bits_per_char - total_pad / (float )params.prompt .length ());
556
+ LOG (" \n PPL (over)estimation: %04f (%04f with padding)" , exp2 (compressed_bits_per_token - total_pad / (float )inp.size ()), exp2 (compressed_bits_per_token));
556
557
}
557
- // maybe this needs to be changed
558
- if (params.out_file != " imatrix.dat" ){
558
+ // maybe this needs to be changed
559
+ if (params.out_file != " imatrix.dat" )
560
+ {
559
561
// dump uint8array to bin file
560
562
std::ofstream ofs (params.out_file .c_str (), std::ios::binary);
561
- ofs.write ((char *)&sample_ids_bitpacked[0 ], sample_ids_bitpacked.size ());
563
+ ofs.write ((char *)&sample_ids_bitpacked[0 ], sample_ids_bitpacked.size ());
562
564
ofs.close ();
563
- }else {
565
+ }
566
+ else
567
+ {
564
568
LOG (" \n ------------\n " );
565
- // print as hex to stdout
566
- for (int i = 0 ; i < sample_ids_bitpacked.size (); i++){
569
+ // print as hex to stdout
570
+ for (int i = 0 ; i < sample_ids_bitpacked.size (); i++)
571
+ {
567
572
LOG (" %02X " , sample_ids_bitpacked[i]);
568
573
}
569
574
}
570
-
571
575
}
572
576
else if (params.compress_mode == 2 )
573
577
{
574
- // decompress mode
575
- // load sample_ids_bitpacked from params.prompt_file
578
+ // decompress mode
579
+ // load sample_ids_bitpacked from params.prompt_file
576
580
std::ifstream ifs (params.prompt_file .c_str (), std::ios::binary);
577
581
578
- if (!ifs) {
582
+ if (!ifs)
583
+ {
579
584
LOG_ERR (" %s: failed to open file\n " , __func__);
580
585
return -1 ;
581
586
}
@@ -588,14 +593,16 @@ int main(int argc, char **argv)
588
593
std::vector<uint8_t > sample_ids_bitpacked (fileSize);
589
594
590
595
// Read the ifs into the vector
591
- if (!ifs.read (reinterpret_cast <char *>(sample_ids_bitpacked.data ()), fileSize)) {
596
+ if (!ifs.read (reinterpret_cast <char *>(sample_ids_bitpacked.data ()), fileSize))
597
+ {
592
598
LOG_ERR (" %s: failed to read file\n " , __func__);
593
599
return -1 ;
594
600
}
595
601
ifs.close ();
596
602
597
- // Debug: print as hex
598
- for (int i = 0 ; i < sample_ids_bitpacked.size (); i++){
603
+ // Debug: print as hex
604
+ for (int i = 0 ; i < sample_ids_bitpacked.size (); i++)
605
+ {
599
606
LOG (" %02X " , sample_ids_bitpacked[i]);
600
607
}
601
608
LOG (" \n " );
@@ -612,23 +619,22 @@ int main(int argc, char **argv)
612
619
613
620
std::vector<llama_token> out = decode (ctx, smpl, sample_ids_bitpacked);
614
621
615
-
616
622
gpt_sampler_free (smpl);
617
623
auto t_dec_end = ggml_time_us ();
618
624
619
- // maybe this needs to be changed
620
- if (params.out_file != " imatrix.dat" ){
625
+ // maybe this needs to be changed
626
+ if (params.out_file != " imatrix.dat" )
627
+ {
621
628
// dump as string to file
622
629
std::string out_str = ::llama_detokenize (ctx, out);
623
630
624
631
std::ofstream ofs (params.out_file .c_str (), std::ios::binary);
625
- ofs.write ((char *)&out_str[0 ], out_str.size ());
632
+ ofs.write ((char *)&out_str[0 ], out_str.size ());
626
633
ofs.close ();
627
634
}
628
635
629
636
llama_free (ctx);
630
637
llama_free_model (model);
631
-
632
638
}
633
639
634
640
llama_backend_free ();
0 commit comments