@@ -346,14 +346,14 @@ mha_fwd_sparse(at::Tensor &q, // batch_size x seqlen_q x num_heads x hea
346
346
const at::Tensor &block_offset,
347
347
const at::Tensor &column_count,
348
348
const at::Tensor &column_index,
349
- const c10 ::optional<at::Tensor> &out_, // batch_size x seqlen_q x num_heads x head_size
350
- const c10 ::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
349
+ const std ::optional<at::Tensor> &out_, // batch_size x seqlen_q x num_heads x head_size
350
+ const std ::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
351
351
const double p_dropout,
352
352
const double softmax_scale,
353
353
bool is_causal,
354
354
const double softcap,
355
355
const bool return_softmax,
356
- c10 ::optional<at::Generator> gen_) {
356
+ std ::optional<at::Generator> gen_) {
357
357
358
358
auto dprops = at::cuda::getCurrentDeviceProperties ();
359
359
// bool is_sm75 = dprops->major == 7 && dprops->minor == 5;
@@ -515,11 +515,11 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_
515
515
const at::Tensor &block_offset,
516
516
const at::Tensor &column_count,
517
517
const at::Tensor &column_index,
518
- const c10 ::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
518
+ const std ::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
519
519
const at::Tensor &cu_seqlens_q, // b+1
520
520
const at::Tensor &cu_seqlens_k, // b+1
521
- const c10 ::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
522
- const c10 ::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
521
+ const std ::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
522
+ const std ::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
523
523
int64_t max_seqlen_q,
524
524
const int64_t max_seqlen_k,
525
525
const double p_dropout,
@@ -528,7 +528,7 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_
528
528
bool is_causal,
529
529
const double softcap,
530
530
const bool return_softmax,
531
- c10 ::optional<at::Generator> gen_) {
531
+ std ::optional<at::Generator> gen_) {
532
532
533
533
auto dprops = at::cuda::getCurrentDeviceProperties ();
534
534
// bool is_sm75 = dprops->major == 7 && dprops->minor == 5;
0 commit comments