@@ -296,6 +296,7 @@ enum llm_kv {
296
296
LLM_KV_EXPERT_WEIGHTS_SCALE,
297
297
LLM_KV_POOLING_TYPE,
298
298
LLM_KV_LOGIT_SCALE,
299
+ LLM_KV_DECODER_START_TOKEN_ID,
299
300
300
301
LLM_KV_ATTENTION_HEAD_COUNT,
301
302
LLM_KV_ATTENTION_HEAD_COUNT_KV,
@@ -384,6 +385,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
384
385
{ LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
385
386
{ LLM_KV_POOLING_TYPE , "%s.pooling_type" },
386
387
{ LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
388
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
387
389
388
390
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
389
391
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
@@ -1908,6 +1910,7 @@ struct llama_hparams {
1908
1910
uint32_t n_expert_used = 0;
1909
1911
uint32_t n_vocab_type = 0; // for BERT-style token types
1910
1912
uint32_t n_rel_attn_bkts = 0;
1913
+ int32_t decoder_start_token_id = -1;
1911
1914
1912
1915
uint32_t n_layer_dense_lead = 0;
1913
1916
uint32_t n_lora_q = 0;
@@ -4606,6 +4609,10 @@ static void llm_load_hparams(
4606
4609
{
4607
4610
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
4608
4611
ml.get_key(LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, hparams.n_rel_attn_bkts);
4612
+ uint32_t decoder_start_token_id;
4613
+ if (ml.get_key(LLM_KV_DECODER_START_TOKEN_ID, decoder_start_token_id, false)) {
4614
+ hparams.decoder_start_token_id = decoder_start_token_id;
4615
+ }
4609
4616
model.type = e_model::MODEL_UNKNOWN;
4610
4617
} break;
4611
4618
default: (void)0;
@@ -17872,6 +17879,17 @@ struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const ch
17872
17879
return it->second;
17873
17880
}
17874
17881
17882
+ bool llama_model_has_encoder(const struct llama_model * model) {
17883
+ switch (model->arch) {
17884
+ case LLM_ARCH_T5: return true;
17885
+ default: return false;
17886
+ }
17887
+ }
17888
+
17889
+ llama_token llama_model_decoder_start_token(const struct llama_model * model) {
17890
+ return model->hparams.decoder_start_token_id;
17891
+ }
17892
+
17875
17893
uint32_t llama_model_quantize(
17876
17894
const char * fname_inp,
17877
17895
const char * fname_out,
0 commit comments