blob: f9254728bab4176402d7c8e809a832880226fd59 [file]
// RUN: mlir-opt -convert-xegpu-to-xevm -canonicalize %s | FileCheck %s
gpu.module @prefetch_check {
// CHECK-LABEL: gpu.func @prefetch_matrix_a
gpu.func @prefetch_matrix_a(%src: memref<16x128xi4, 1>) kernel {
// CHECK: %[[C64_I32:.*]] = arith.constant 64 : i32
// CHECK: %[[C8_I32:.*]] = arith.constant 8 : i32
// CHECK: %[[C16_I32:.*]] = arith.constant 16 : i32
%srcce = memref.memory_space_cast %src : memref<16x128xi4, 1> to memref<16x128xi4>
%src_tdesc = xegpu.create_nd_tdesc %srcce : memref<16x128xi4> -> !xegpu.tensor_desc<8x64xi4>
// CHECK: xevm.blockprefetch2d %{{.*}}, %[[C64_I32]], %[[C16_I32]], %[[C64_I32]], %[[C16_I32]], %[[C8_I32]]
// CHECK-SAME: <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>, elem_size_in_bits = 16 : i32,
// CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32, v_blocks = 1 : i32}> : (!llvm.ptr<1>
xegpu.prefetch_nd %src_tdesc[8, 64] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: !xegpu.tensor_desc<8x64xi4>
gpu.return
}
}