blob: 80ae8aac39115c3fbb59a23d5101ae8948a17a01 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
define i32 @test_ld_param_const(ptr byval(i32) %a) {
; CHECK-LABEL: test_ld_param_const(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [test_ld_param_const_param_0+4];
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%p2 = getelementptr i32, ptr %a, i32 1
%ld = load i32, ptr %p2
ret i32 %ld
}
define i32 @test_ld_param_non_const(ptr byval([10 x i32]) %a, i32 %b) {
; CHECK-LABEL: test_ld_param_non_const(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.b64 %rd1, test_ld_param_non_const_param_0;
; CHECK-NEXT: ld.param.s32 %rd2, [test_ld_param_non_const_param_1];
; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2;
; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%p2 = getelementptr i8, ptr %a, i32 %b
%ld = load i32, ptr %p2
ret i32 %ld
}
declare void @escape(ptr)
declare void @byval_user(ptr byval(i32))
define void @test_ld_param_escaping(ptr byval(i32) %a) {
; CHECK-LABEL: test_ld_param_escaping(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.b64 %rd1, test_ld_param_escaping_param_0;
; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni
; CHECK-NEXT: escape,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @escape(ptr %a)
ret void
}
define void @test_ld_param_byval(ptr byval(i32) %a) {
; CHECK-LABEL: test_ld_param_byval(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [test_ld_param_byval_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni
; CHECK-NEXT: byval_user,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
call void @byval_user(ptr %a)
ret void
}
define i32 @test_modify_param(ptr byval([10 x i32]) %a, i32 %b, i32 %c ) {
; CHECK-LABEL: test_modify_param(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.b64 %rd1, test_modify_param_param_0;
; CHECK-NEXT: ld.param.u32 %r1, [test_modify_param_param_1];
; CHECK-NEXT: ld.param.u32 %r2, [test_modify_param_param_2];
; CHECK-NEXT: st.local.u32 [%rd1+2], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%p2 = getelementptr i8, ptr %a, i32 2
store volatile i32 %b, ptr %p2
ret i32 %c
}
define i32 @test_multi_block(ptr byval([10 x i32]) %a, i1 %p) {
; CHECK-LABEL: test_multi_block(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<3>;
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_multi_block_param_1];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: not.pred %p2, %p1;
; CHECK-NEXT: @%p2 bra $L__BB5_2;
; CHECK-NEXT: // %bb.1: // %if
; CHECK-NEXT: ld.param.u32 %r4, [test_multi_block_param_0+4];
; CHECK-NEXT: bra.uni $L__BB5_3;
; CHECK-NEXT: $L__BB5_2: // %else
; CHECK-NEXT: ld.param.u32 %r4, [test_multi_block_param_0+8];
; CHECK-NEXT: $L__BB5_3: // %end
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
br i1 %p, label %if, label %else
if:
%p2 = getelementptr i8, ptr %a, i32 4
%v2 = load i32, ptr %p2
br label %end
else:
%p3 = getelementptr i8, ptr %a, i32 8
%v3 = load i32, ptr %p3
br label %end
end:
%v = phi i32 [ %v2, %if ], [ %v3, %else ]
ret i32 %v
}