@@ -9790,8 +9790,8 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
9790
9790
// TODO: mmq/mmv support
9791
9791
#endif
9792
9792
9793
- const int64_t nb11 = src1->nb [1 ];
9794
- const int64_t nb1 = dst->nb [1 ];
9793
+ const size_t nb11 = src1->nb[1];
9794
+ const size_t nb1 = dst->nb[1];
9795
9795
9796
9796
const struct ggml_tensor * ids = src0;
9797
9797
const int32_t id = ((int32_t *) dst->op_params)[0];
@@ -10304,15 +10304,11 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t
10304
10304
10305
10305
if (ggml_is_quantized(tensor->type)) {
10306
10306
// initialize padding to 0 to avoid possible NaN values
10307
- int64_t row_low = 0 ;
10308
- int64_t row_high = ggml_nrows (tensor);
10309
- int64_t nrows_split = row_high - row_low;
10310
-
10311
- size_t original_size = ggml_nbytes_split (tensor, nrows_split);
10307
+ size_t original_size = ggml_nbytes(tensor);
10312
10308
size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor);
10313
10309
10314
10310
if (padded_size > original_size && tensor->view_src == nullptr) {
10315
- CUDA_CHECK (cudaMemsetAsync ((char *)tensor->data + original_size, 0 , padded_size - original_size, g_cudaStreams[ctx-> device ][ 0 ] ));
10311
+ CUDA_CHECK(cudaMemset ((char *)tensor->data + original_size, 0, padded_size - original_size));
10316
10312
}
10317
10313
}
10318
10314
}
@@ -10415,12 +10411,7 @@ GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend
10415
10411
}
10416
10412
10417
10413
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
10418
- int64_t row_low = 0 ;
10419
- int64_t row_high = ggml_nrows (tensor);
10420
- int64_t nrows_split = row_high - row_low;
10421
-
10422
- size_t size = ggml_nbytes_split (tensor, nrows_split);
10423
-
10414
+ size_t size = ggml_nbytes(tensor);
10424
10415
int64_t ne0 = tensor->ne[0];
10425
10416
10426
10417
if (ggml_is_quantized(tensor->type)) {
0 commit comments