[NVPTX] Make tensor shape part of WMMA intrinsic's name.
This is needed for the upcoming implementation of the
new 8x32x16 and 32x8x16 variants of WMMA instructions
introduced in CUDA 9.1.
Differential Revision: https://reviews.llvm.org/D44719
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@328158 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 996e5e7..d3ea1f2 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -10515,23 +10515,23 @@
unsigned NumResults;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_ld_a:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
- : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_b:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
- : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
- : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
- : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
NumResults = 8;
break;
default:
@@ -10566,13 +10566,13 @@
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
- : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
- : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
@@ -10591,8 +10591,8 @@
return Result;
}
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
- // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
+ // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
@@ -10613,15 +10613,15 @@
bool Satf = SatfArg.getSExtValue();
// clang-format off
-#define MMA_VARIANTS(type) {{ \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+#define MMA_VARIANTS(type) {{ \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \
}}
// clang-format on
diff --git a/test/CodeGen/builtins-nvptx-sm_70.cu b/test/CodeGen/builtins-nvptx-sm_70.cu
index 09e5b6b..1e9133b 100644
--- a/test/CodeGen/builtins-nvptx-sm_70.cu
+++ b/test/CodeGen/builtins-nvptx-sm_70.cu
@@ -22,145 +22,145 @@
__device__ void nvvm_wmma(int *src, int *dst,
float *fsrc, float *fdst,
int ldm) {
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
}