blob: 4e463a14e753742f643a11801590fa783cf860c7 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v1_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
; With src_fmt as b6x16_p32
define void @test_tcgen05_cp_128x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
; With src_fmt as b4x16_p64
define void @test_tcgen05_cp_128x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_4x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_128x128b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
define void @test_tcgen05_cp_32x128_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}