blob: e5498ccf85f20d6fb2a01b01c3fd9a07083b17ce [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -O0 -mtriple=nvptx-- -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefix=CHECK32
; RUN: llc < %s -O0 -mtriple=nvptx64-- -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefix=CHECK64
; RUN: %if ptxas-isa-6.0 && ptxas-ptr32 %{ llc < %s -O0 -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; RUN: %if ptxas-isa-6.0 %{ llc < %s -O0 -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; CHECK: .address_size [[BITS:32|64]]
%struct.__va_list_tag = type { ptr, ptr, i32, i32 }
@foo_ptr = internal addrspace(1) global ptr @foo, align 8
define i32 @foo(i32 %a, ...) {
; CHECK32-LABEL: foo(
; CHECK32: {
; CHECK32-NEXT: .local .align 8 .b8 __local_depot0[32];
; CHECK32-NEXT: .reg .b32 %SP;
; CHECK32-NEXT: .reg .b32 %SPL;
; CHECK32-NEXT: .reg .b32 %r<29>;
; CHECK32-NEXT: .reg .b64 %rd<3>;
; CHECK32-EMPTY:
; CHECK32-NEXT: // %bb.0: // %entry
; CHECK32-NEXT: mov.b32 %SPL, __local_depot0;
; CHECK32-NEXT: cvta.local.u32 %SP, %SPL;
; CHECK32-NEXT: ld.param.b32 %r2, [foo_param_1];
; CHECK32-NEXT: ld.param.b32 %r1, [foo_param_0];
; CHECK32-NEXT: st.b32 [%SP], %r2;
; CHECK32-NEXT: ld.b32 %r3, [%SP];
; CHECK32-NEXT: st.b32 [%SP+16], %r3;
; CHECK32-NEXT: ld.b32 %r4, [%SP];
; CHECK32-NEXT: add.s32 %r5, %r4, 3;
; CHECK32-NEXT: and.b32 %r6, %r5, -4;
; CHECK32-NEXT: add.s32 %r7, %r6, 4;
; CHECK32-NEXT: st.b32 [%SP], %r7;
; CHECK32-NEXT: ld.b32 %r8, [%r6];
; CHECK32-NEXT: ld.b32 %r9, [%SP];
; CHECK32-NEXT: add.s32 %r10, %r9, 7;
; CHECK32-NEXT: and.b32 %r11, %r10, -8;
; CHECK32-NEXT: add.s32 %r12, %r11, 8;
; CHECK32-NEXT: st.b32 [%SP], %r12;
; CHECK32-NEXT: ld.b64 %rd1, [%r11];
; CHECK32-NEXT: ld.b32 %r13, [%SP];
; CHECK32-NEXT: add.s32 %r14, %r13, 7;
; CHECK32-NEXT: and.b32 %r15, %r14, -8;
; CHECK32-NEXT: add.s32 %r16, %r15, 8;
; CHECK32-NEXT: st.b32 [%SP], %r16;
; CHECK32-NEXT: ld.b64 %rd2, [%r15];
; CHECK32-NEXT: ld.b32 %r17, [%SP];
; CHECK32-NEXT: add.s32 %r18, %r17, 3;
; CHECK32-NEXT: and.b32 %r19, %r18, -4;
; CHECK32-NEXT: add.s32 %r20, %r19, 4;
; CHECK32-NEXT: st.b32 [%SP], %r20;
; CHECK32-NEXT: ld.b32 %r21, [%r19];
; CHECK32-NEXT: { // callseq 0, 0
; CHECK32-NEXT: .param .b32 param0;
; CHECK32-NEXT: .param .b32 param1;
; CHECK32-NEXT: .param .b64 param2;
; CHECK32-NEXT: .param .b64 param3;
; CHECK32-NEXT: .param .b32 param4;
; CHECK32-NEXT: .param .b32 retval0;
; CHECK32-NEXT: st.param.b32 [param4], %r21;
; CHECK32-NEXT: st.param.b64 [param3], %rd2;
; CHECK32-NEXT: st.param.b64 [param2], %rd1;
; CHECK32-NEXT: st.param.b32 [param1], %r8;
; CHECK32-NEXT: st.param.b32 [param0], %r1;
; CHECK32-NEXT: call.uni (retval0), bar, (param0, param1, param2, param3, param4);
; CHECK32-NEXT: ld.param.b32 %r22, [retval0];
; CHECK32-NEXT: } // callseq 0
; CHECK32-NEXT: ld.b32 %r23, [%SP+16];
; CHECK32-NEXT: add.s32 %r24, %r23, 3;
; CHECK32-NEXT: and.b32 %r25, %r24, -4;
; CHECK32-NEXT: add.s32 %r26, %r25, 4;
; CHECK32-NEXT: st.b32 [%SP+16], %r26;
; CHECK32-NEXT: ld.b32 %r27, [%r25];
; CHECK32-NEXT: add.s32 %r28, %r22, %r27;
; CHECK32-NEXT: st.param.b32 [func_retval0], %r28;
; CHECK32-NEXT: ret;
;
; CHECK64-LABEL: foo(
; CHECK64: {
; CHECK64-NEXT: .local .align 8 .b8 __local_depot0[48];
; CHECK64-NEXT: .reg .b64 %SP;
; CHECK64-NEXT: .reg .b64 %SPL;
; CHECK64-NEXT: .reg .b32 %r<6>;
; CHECK64-NEXT: .reg .b64 %rd<26>;
; CHECK64-EMPTY:
; CHECK64-NEXT: // %bb.0: // %entry
; CHECK64-NEXT: mov.b64 %SPL, __local_depot0;
; CHECK64-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK64-NEXT: ld.param.b64 %rd1, [foo_param_1];
; CHECK64-NEXT: ld.param.b32 %r1, [foo_param_0];
; CHECK64-NEXT: st.b64 [%SP], %rd1;
; CHECK64-NEXT: ld.b64 %rd2, [%SP];
; CHECK64-NEXT: st.b64 [%SP+24], %rd2;
; CHECK64-NEXT: ld.b64 %rd3, [%SP];
; CHECK64-NEXT: add.s64 %rd4, %rd3, 3;
; CHECK64-NEXT: and.b64 %rd5, %rd4, -4;
; CHECK64-NEXT: add.s64 %rd6, %rd5, 4;
; CHECK64-NEXT: st.b64 [%SP], %rd6;
; CHECK64-NEXT: ld.b32 %r2, [%rd5];
; CHECK64-NEXT: ld.b64 %rd7, [%SP];
; CHECK64-NEXT: add.s64 %rd8, %rd7, 7;
; CHECK64-NEXT: and.b64 %rd9, %rd8, -8;
; CHECK64-NEXT: add.s64 %rd10, %rd9, 8;
; CHECK64-NEXT: st.b64 [%SP], %rd10;
; CHECK64-NEXT: ld.b64 %rd11, [%rd9];
; CHECK64-NEXT: ld.b64 %rd12, [%SP];
; CHECK64-NEXT: add.s64 %rd13, %rd12, 7;
; CHECK64-NEXT: and.b64 %rd14, %rd13, -8;
; CHECK64-NEXT: add.s64 %rd15, %rd14, 8;
; CHECK64-NEXT: st.b64 [%SP], %rd15;
; CHECK64-NEXT: ld.b64 %rd16, [%rd14];
; CHECK64-NEXT: ld.b64 %rd17, [%SP];
; CHECK64-NEXT: add.s64 %rd18, %rd17, 7;
; CHECK64-NEXT: and.b64 %rd19, %rd18, -8;
; CHECK64-NEXT: add.s64 %rd20, %rd19, 8;
; CHECK64-NEXT: st.b64 [%SP], %rd20;
; CHECK64-NEXT: ld.b64 %rd21, [%rd19];
; CHECK64-NEXT: { // callseq 0, 0
; CHECK64-NEXT: .param .b32 param0;
; CHECK64-NEXT: .param .b32 param1;
; CHECK64-NEXT: .param .b64 param2;
; CHECK64-NEXT: .param .b64 param3;
; CHECK64-NEXT: .param .b64 param4;
; CHECK64-NEXT: .param .b32 retval0;
; CHECK64-NEXT: st.param.b64 [param4], %rd21;
; CHECK64-NEXT: st.param.b64 [param3], %rd16;
; CHECK64-NEXT: st.param.b64 [param2], %rd11;
; CHECK64-NEXT: st.param.b32 [param1], %r2;
; CHECK64-NEXT: st.param.b32 [param0], %r1;
; CHECK64-NEXT: call.uni (retval0), bar, (param0, param1, param2, param3, param4);
; CHECK64-NEXT: ld.param.b32 %r3, [retval0];
; CHECK64-NEXT: } // callseq 0
; CHECK64-NEXT: ld.b64 %rd22, [%SP+24];
; CHECK64-NEXT: add.s64 %rd23, %rd22, 3;
; CHECK64-NEXT: and.b64 %rd24, %rd23, -4;
; CHECK64-NEXT: add.s64 %rd25, %rd24, 4;
; CHECK64-NEXT: st.b64 [%SP+24], %rd25;
; CHECK64-NEXT: ld.b32 %r4, [%rd24];
; CHECK64-NEXT: add.s32 %r5, %r3, %r4;
; CHECK64-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK64-NEXT: ret;
entry:
%al = alloca [1 x %struct.__va_list_tag], align 8
%al2 = alloca [1 x %struct.__va_list_tag], align 8
; Test va_start
call void @llvm.va_start(ptr %al)
; Test va_copy()
call void @llvm.va_copy(ptr %al2, ptr %al)
; Test va_arg(ap, int32_t)
%0 = va_arg ptr %al, i32
; Test va_arg(ap, int64_t)
%1 = va_arg ptr %al, i64
; Test va_arg(ap, double)
%2 = va_arg ptr %al, double
; Test va_arg(ap, ptr)
%3 = va_arg ptr %al, ptr
%call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, ptr %3)
call void @llvm.va_end(ptr %al)
%4 = va_arg ptr %al2, i32
call void @llvm.va_end(ptr %al2)
%5 = add i32 %call, %4
ret i32 %5
}
define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
; Test indirect variadic function call.
; Load arguments to temporary variables
; CHECK32-LABEL: test_foo(
; CHECK32: {
; CHECK32-NEXT: .local .align 8 .b8 __local_depot1[32];
; CHECK32-NEXT: .reg .b32 %SP;
; CHECK32-NEXT: .reg .b32 %SPL;
; CHECK32-NEXT: .reg .b32 %r<8>;
; CHECK32-NEXT: .reg .b64 %rd<3>;
; CHECK32-EMPTY:
; CHECK32-NEXT: // %bb.0: // %entry
; CHECK32-NEXT: mov.b32 %SPL, __local_depot1;
; CHECK32-NEXT: cvta.local.u32 %SP, %SPL;
; CHECK32-NEXT: ld.param.b32 %r2, [test_foo_param_3];
; CHECK32-NEXT: ld.param.b64 %rd2, [test_foo_param_2];
; CHECK32-NEXT: ld.param.b64 %rd1, [test_foo_param_1];
; CHECK32-NEXT: ld.param.b32 %r1, [test_foo_param_0];
; CHECK32-NEXT: mov.b32 %r3, foo_ptr;
; CHECK32-NEXT: cvta.global.u32 %r4, %r3;
; CHECK32-NEXT: ld.b32 %r5, [%r4];
; CHECK32-NEXT: st.b32 [%SP], %r1;
; CHECK32-NEXT: st.b64 [%SP+8], %rd1;
; CHECK32-NEXT: st.b64 [%SP+16], %rd2;
; CHECK32-NEXT: st.b32 [%SP+24], %r2;
; CHECK32-NEXT: { // callseq 1, 0
; CHECK32-NEXT: .param .b32 param0;
; CHECK32-NEXT: .param .b32 param1;
; CHECK32-NEXT: .param .b32 retval0;
; CHECK32-NEXT: add.u32 %r6, %SP, 0;
; CHECK32-NEXT: st.param.b32 [param1], %r6;
; CHECK32-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .b32 _);
; CHECK32-NEXT: st.param.b32 [param0], 4;
; CHECK32-NEXT: call (retval0), %r5, (param0, param1), prototype_1;
; CHECK32-NEXT: ld.param.b32 %r7, [retval0];
; CHECK32-NEXT: } // callseq 1
; CHECK32-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK32-NEXT: ret;
;
; CHECK64-LABEL: test_foo(
; CHECK64: {
; CHECK64-NEXT: .local .align 8 .b8 __local_depot1[32];
; CHECK64-NEXT: .reg .b64 %SP;
; CHECK64-NEXT: .reg .b64 %SPL;
; CHECK64-NEXT: .reg .b32 %r<3>;
; CHECK64-NEXT: .reg .b64 %rd<8>;
; CHECK64-EMPTY:
; CHECK64-NEXT: // %bb.0: // %entry
; CHECK64-NEXT: mov.b64 %SPL, __local_depot1;
; CHECK64-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK64-NEXT: ld.param.b64 %rd3, [test_foo_param_3];
; CHECK64-NEXT: ld.param.b64 %rd2, [test_foo_param_2];
; CHECK64-NEXT: ld.param.b64 %rd1, [test_foo_param_1];
; CHECK64-NEXT: ld.param.b32 %r1, [test_foo_param_0];
; CHECK64-NEXT: mov.b64 %rd4, foo_ptr;
; CHECK64-NEXT: cvta.global.u64 %rd5, %rd4;
; CHECK64-NEXT: ld.b64 %rd6, [%rd5];
; CHECK64-NEXT: st.b32 [%SP], %r1;
; CHECK64-NEXT: st.b64 [%SP+8], %rd1;
; CHECK64-NEXT: st.b64 [%SP+16], %rd2;
; CHECK64-NEXT: st.b64 [%SP+24], %rd3;
; CHECK64-NEXT: { // callseq 1, 0
; CHECK64-NEXT: .param .b32 param0;
; CHECK64-NEXT: .param .b64 param1;
; CHECK64-NEXT: .param .b32 retval0;
; CHECK64-NEXT: add.u64 %rd7, %SP, 0;
; CHECK64-NEXT: st.param.b64 [param1], %rd7;
; CHECK64-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .b64 _);
; CHECK64-NEXT: st.param.b32 [param0], 4;
; CHECK64-NEXT: call (retval0), %rd6, (param0, param1), prototype_1;
; CHECK64-NEXT: ld.param.b32 %r2, [retval0];
; CHECK64-NEXT: } // callseq 1
; CHECK64-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK64-NEXT: ret;
; Store arguments to an array
entry:
%ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
%call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, ptr %p)
ret i32 %call
}
declare void @llvm.va_start(ptr)
declare void @llvm.va_end(ptr)
declare void @llvm.va_copy(ptr, ptr)
declare i32 @bar(i32, i32, i64, double, ptr)