blob: 7c884a70b75300754befa50b3bdb29311866006f [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: not llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 -o /dev/null 2>&1 | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
define void @tcgen05_mma_scale_d_invalid_flag_values(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d) {
; CHECK: immarg value 16 out of range [0, 16)
call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 16, i32 0, i32 1, i32 0)
; CHECK: immarg value 3 out of range [0, 2)
call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 0, i32 3, i32 1, i32 0)
; CHECK: immarg value 0 out of range [1, 3)
call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 0, i32 0, i32 0, i32 0)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 0, i32 0, i32 1, i32 5)
ret void
}
define void @tcgen05_mma_scale_d_disable_output_lane_invalid_flag_values(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4) {
; CHECK: immarg value 16 out of range [0, 16)
call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 16, <4 x i32> %disable_output_lanev4, i32 0, i32 0)
; CHECK: immarg value 3 out of range [0, 2)
call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 0, <4 x i32> %disable_output_lanev4, i32 3, i32 0)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i64 0, <4 x i32> %disable_output_lanev4, i32 0, i32 5)
ret void
}