@@ -299,6 +299,7 @@ struct vk_device_struct {
299
299
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16;
300
300
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16;
301
301
vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT];
302
+ vk_pipeline pipeline_cpy_f32_quant_rte[GGML_TYPE_COUNT];
302
303
vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT];
303
304
vk_pipeline pipeline_norm_f32;
304
305
vk_pipeline pipeline_group_norm_f32;
@@ -2281,13 +2282,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
2281
2282
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f32 , " contig_cpy_f32_f32" , contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2282
2283
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f16 , " contig_cpy_f32_f16" , contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2283
2284
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f16_f16 , " contig_cpy_f16_f16" , contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2284
-
2285
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2286
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2287
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2288
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2289
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2290
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2285
+ if (device->float_controls_rte_fp16 ) {
2286
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2287
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2288
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2289
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2290
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2291
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2292
+ } else {
2293
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2294
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2295
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2296
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2297
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2298
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2299
+ }
2291
2300
2292
2301
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_0], " cpy_q4_0_f32" , cpy_q4_0_f32_len, cpy_q4_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2293
2302
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_1], " cpy_q4_1_f32" , cpy_q4_1_f32_len, cpy_q4_1_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
@@ -4133,16 +4142,30 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const
4133
4142
}
4134
4143
}
4135
4144
if (src->type == GGML_TYPE_F32) {
4136
- switch (to) {
4137
- case GGML_TYPE_Q4_0:
4138
- case GGML_TYPE_Q4_1:
4139
- case GGML_TYPE_Q5_0:
4140
- case GGML_TYPE_Q5_1:
4141
- case GGML_TYPE_Q8_0:
4142
- case GGML_TYPE_IQ4_NL:
4143
- return ctx->device ->pipeline_cpy_f32_quant [to];
4144
- default :
4145
- break ;
4145
+ if (ctx->device ->float_controls_rte_fp16 ) {
4146
+ switch (to) {
4147
+ case GGML_TYPE_Q4_0:
4148
+ case GGML_TYPE_Q4_1:
4149
+ case GGML_TYPE_Q5_0:
4150
+ case GGML_TYPE_Q5_1:
4151
+ case GGML_TYPE_Q8_0:
4152
+ case GGML_TYPE_IQ4_NL:
4153
+ return ctx->device ->pipeline_cpy_f32_quant_rte [to];
4154
+ default :
4155
+ break ;
4156
+ }
4157
+ } else {
4158
+ switch (to) {
4159
+ case GGML_TYPE_Q4_0:
4160
+ case GGML_TYPE_Q4_1:
4161
+ case GGML_TYPE_Q5_0:
4162
+ case GGML_TYPE_Q5_1:
4163
+ case GGML_TYPE_Q8_0:
4164
+ case GGML_TYPE_IQ4_NL:
4165
+ return ctx->device ->pipeline_cpy_f32_quant [to];
4166
+ default :
4167
+ break ;
4168
+ }
4146
4169
}
4147
4170
}
4148
4171
0 commit comments