blob: dff829ecf53210be59b928c31c8060a134076e88 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: not llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 -o /dev/null 2>&1 | FileCheck %s
; RUN: not llc < %s -mtriple=nvptx64 -mcpu=sm_101a -mattr=+ptx86 -o /dev/null 2>&1 | FileCheck %s
; RUN: not llc < %s -mtriple=nvptx64 -mcpu=sm_110a -mattr=+ptx90 -o /dev/null 2>&1 | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
define void @tcgen05_mma_invalid_flag_values(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d) {
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 5, i32 1, i32 3)
; CHECK: immarg value 0 out of range [1, 3)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 3, i32 0, i32 3)
; CHECK: immarg value 3 out of range [1, 3)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 3, i32 3, i32 3)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 3, i32 2, i32 5)
; CHECK: immarg value 2 out of range [0, 2)
call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 3, i32 2, i32 2)
ret void
}
define void @tcgen05_mma_disable_output_lane_invalid_flag_values(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4) {
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 5, i32 0)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 5)
ret void
}
define void @tcgen05_mma_ws_invalid_flag_values(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d) {
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 5, i32 0, i32 0)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 5, i32 0)
; CHECK: immarg value 5 out of range [0, 4)
call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 0, i32 5)
ret void
}