Skip to content

Use one matrix per load for BT crashes #4100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
alexbaden opened this issue May 5, 2025 · 0 comments
Open

Use one matrix per load for BT crashes #4100

alexbaden opened this issue May 5, 2025 · 0 comments

Comments

@alexbaden
Copy link
Contributor

// COM: transpose 2D block load reduced to be <= block size
// CHECK: llvm.func spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_32r8x1cPU3AS1viiiDv2_iPj(!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
#dpas = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 1, threadsPerWarp = 16, warpsPerCTA = [8, 4], repCluster = [4, 2], A = [32, 8], B = [8, 32], C = [32, 32]}>
#dot1 = #ttg.dot_op<{opIdx = 1, parent = #dpas, kWidth=1}>
module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32} {
  tt.func public @matmul_no_scf_with_advance_kernel(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg7: i64) {
    %c0_i32 = arith.constant 0 : i32
    %c1_i64 = arith.constant 1 : i64
    %ptrB = tt.make_tensor_ptr %arg1, [%arg4, %arg3], [%c1_i64, %arg7], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<16x128xf32, #dot1>>
    // CHECK-COUNT-2: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_32r8x1cPU3AS1viiiDv2_iPj({{.*}}) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>, !llvm.ptr{{.*}}) -> ()
    // CHECK-NOT: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_32r8x1cPU3AS1viiiDv2_iPj({{.*}}) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>, !llvm.ptr{{.*}}) -> ()
    %B = tt.load %ptrB {boundaryCheck = array<i32: 0>, padding = 1 : i32, triton_intel_gpu.block_io = "column_major"} : !tt.ptr<tensor<16x128xf32, #dot1>>
    tt.return
  }
}

crashes with message

triton-opt: /home/runner/work/triton/triton/llvm-project/mlir/lib/IR/SymbolTable.cpp:137: mlir::SymbolTable::SymbolTable(mlir::Operation *): Assertion `inserted.second && "expected region to contain uniquely named symbol operations"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.

the call stack shows

#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737348066304) at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=140737348066304) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=140737348066304, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3  0x00007ffff7a7c476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x00007ffff7a627f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x00007ffff7a6271b in __assert_fail_base (fmt=0x7ffff7c17130 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x55555acd5ad2 "inserted.second && \"expected region to contain uniquely named symbol operations\"", 
    file=0x55555acd59a0 "/home/runner/work/triton/triton/llvm-project/mlir/lib/IR/SymbolTable.cpp", line=137, function=<optimized out>) at ./assert/assert.c:94
#6  0x00007ffff7a73e96 in __GI___assert_fail (assertion=0x55555acd5ad2 "inserted.second && \"expected region to contain uniquely named symbol operations\"", file=0x55555acd59a0 "/home/runner/work/triton/triton/llvm-project/mlir/lib/IR/SymbolTable.cpp", line=137, 
    function=0x55555acd59e9 "mlir::SymbolTable::SymbolTable(mlir::Operation *)") at ./assert/assert.c:103
#7  0x00005555594e016f in mlir::SymbolTable::SymbolTable(mlir::Operation*) ()
#8  0x00005555594e417c in mlir::SymbolTableCollection::getSymbolTable(mlir::Operation*) ()
#9  0x00005555594e7bac in mlir::Operation* llvm::function_ref<mlir::Operation* (mlir::Operation*, mlir::StringAttr)>::callback_fn<mlir::SymbolTableCollection::lookupSymbolIn(mlir::Operation*, mlir::SymbolRefAttr, llvm::SmallVectorImpl<mlir::Operation*>&)::$_5>(long, mlir::Operation*, mlir::StringAttr)
    ()
--Type <RET> for more, q to quit, c to continue without paging--
#10 0x00005555594e2902 in lookupSymbolInImpl(mlir::Operation*, mlir::SymbolRefAttr, llvm::SmallVectorImpl<mlir::Operation*>&, llvm::function_ref<mlir::Operation* (mlir::Operation*, mlir::StringAttr)>) ()
#11 0x00005555594e4436 in mlir::SymbolTableCollection::lookupNearestSymbolFrom(mlir::Operation*, mlir::SymbolRefAttr) ()
#12 0x0000555555f9cfa0 in mlir::CallGraph<llvm::DenseMap<mlir::Value, mlir::triton::AxisInfo, llvm::DenseMapInfo<mlir::Value, void>, llvm::detail::DenseMapPair<mlir::Value, mlir::triton::AxisInfo> > >::build()::{lambda(mlir::Operation*)#1}::operator()(mlir::Operation*) const (__closure=0x7fffffffaab0, 
    op=0x55555bf3ced0) at /home/jovyan/intel-xpu-backend-for-triton/include/triton/Analysis/Utility.h:359
<< snip >>
#21 0x0000555555f9d11a in mlir::CallGraph<llvm::DenseMap<mlir::Value, mlir::triton::AxisInfo, llvm::DenseMapInfo<mlir::Value, void>, llvm::detail::DenseMapPair<mlir::Value, mlir::triton::AxisInfo> > >::build (this=0x7fffffffb360)
    at /home/jovyan/intel-xpu-backend-for-triton/include/triton/Analysis/Utility.h:356
#22 0x0000555555f9bf75 in mlir::CallGraph<llvm::DenseMap<mlir::Value, mlir::triton::AxisInfo, llvm::DenseMapInfo<mlir::Value, void>, llvm::detail::DenseMapPair<mlir::Value, mlir::triton::AxisInfo> > >::CallGraph (this=0x7fffffffb360, moduleOp=...)
    at /home/jovyan/intel-xpu-backend-for-triton/include/triton/Analysis/Utility.h:281
#23 0x0000555555f9a478 in mlir::triton::ModuleAxisInfoAnalysis::ModuleAxisInfoAnalysis (this=0x7fffffffb360, moduleOp=...) at /home/jovyan/intel-xpu-backend-for-triton/include/triton/Analysis/AxisInfo.h:222

which is called by AxisInfo analysis building the module graph

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant