blob: 735fd674a2ff129e2221ac79d02d39185a82874d [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X64
; Test correct handling of a musttail call with a byval struct argument.
%struct.1xi32 = type { [1 x i32] }
%struct.3xi32 = type { [3 x i32] }
%struct.5xi32 = type { [5 x i32] }
declare dso_local i32 @Func1(ptr byval(%struct.1xi32) %0)
declare dso_local i32 @Func3(ptr byval(%struct.3xi32) %0)
declare dso_local i32 @Func5(ptr byval(%struct.5xi32) %0)
declare dso_local i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
define dso_local i32 @test1(ptr byval(%struct.1xi32) %0) {
; X32-LABEL: test1:
; X32: # %bb.0:
; X32-NEXT: jmp Func1 # TAILCALL
;
; X64-LABEL: test1:
; X64: # %bb.0:
; X64-NEXT: jmp Func1 # TAILCALL
%r = musttail call i32 @Func1(ptr byval(%struct.1xi32) %0)
ret i32 %r
}
define dso_local i32 @test3(ptr byval(%struct.3xi32) %0) {
; X32-LABEL: test3:
; X32: # %bb.0:
; X32-NEXT: jmp Func3 # TAILCALL
;
; X64-LABEL: test3:
; X64: # %bb.0:
; X64-NEXT: jmp Func3 # TAILCALL
%r = musttail call i32 @Func3(ptr byval(%struct.3xi32) %0)
ret i32 %r
}
; sizeof(%struct.5xi32) > 16, in x64 this is passed on stack.
define dso_local i32 @test5(ptr byval(%struct.5xi32) %0) {
; X32-LABEL: test5:
; X32: # %bb.0:
; X32-NEXT: jmp Func5 # TAILCALL
;
; X64-LABEL: test5:
; X64: # %bb.0:
; X64-NEXT: jmp Func5 # TAILCALL
%r = musttail call i32 @Func5(ptr byval(%struct.5xi32) %0)
ret i32 %r
}
; Test passing multiple arguments with different sizes on stack. In x64 Linux
; the first 6 are passed by register.
define dso_local i32 @testManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
; X32-LABEL: testManyArgs:
; X32: # %bb.0:
; X32-NEXT: jmp FuncManyArgs # TAILCALL
;
; X64-LABEL: testManyArgs:
; X64: # %bb.0:
; X64-NEXT: jmp FuncManyArgs # TAILCALL
%r = musttail call i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
ret i32 %r
}
define dso_local i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
; X32-LABEL: testRecursion:
; X32: # %bb.0:
; X32-NEXT: jmp testRecursion # TAILCALL
;
; X64-LABEL: testRecursion:
; X64: # %bb.0:
; X64-NEXT: jmp testRecursion # TAILCALL
%r = musttail call i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
ret i32 %r
}
define dso_local i32 @swap(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) noinline {
; X32-LABEL: swap:
; X32: # %bb.0: # %entry
; X32-NEXT: mov eax, dword ptr [esp + 4]
; X32-NEXT: add eax, dword ptr [esp + 8]
; X32-NEXT: ret
;
; X64-LABEL: swap:
; X64: # %bb.0: # %entry
; X64-NEXT: mov eax, dword ptr [rsp + 8]
; X64-NEXT: add eax, dword ptr [rsp + 16]
; X64-NEXT: ret
entry:
%a.ptr = getelementptr inbounds %struct.1xi32, ptr %0, i32 0, i32 0, i32 0
%a = load i32, ptr %a.ptr, align 4
%b.ptr = getelementptr inbounds %struct.1xi32, ptr %1, i32 0, i32 0, i32 0
%b = load i32, ptr %b.ptr, align 4
%sum = add i32 %a, %b
ret i32 %sum
}
define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) {
; X32-LABEL: swapByValArguments:
; X32: # %bb.0:
; X32-NEXT: sub esp, 8
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: mov eax, dword ptr [esp + 12]
; X32-NEXT: mov dword ptr [esp], eax
; X32-NEXT: mov eax, dword ptr [esp + 16]
; X32-NEXT: mov dword ptr [esp + 4], eax
; X32-NEXT: add esp, 8
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: jmp swap # TAILCALL
;
; X64-LABEL: swapByValArguments:
; X64: # %bb.0:
; X64-NEXT: mov eax, dword ptr [rsp + 8]
; X64-NEXT: mov dword ptr [rsp - 16], eax
; X64-NEXT: mov eax, dword ptr [rsp + 16]
; X64-NEXT: mov dword ptr [rsp - 8], eax
; X64-NEXT: jmp swap # TAILCALL
%r = musttail call i32 @swap(ptr byval(%struct.1xi32) %1, ptr byval(%struct.1xi32) %0)
ret i32 %r
}
; Clang only uses byval for arguments of 65 bytes or larger, but e.g. rustc
; does use byval for smaller types. Here we use a 20 byte struct to keep
; the tests more readable.
%twenty_bytes = type { [5 x i32] }
declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
; Functions with byval parameters can be tail-called, because the value is
; actually passed in registers and the stack in the same way for the caller and
; callee. On x86 byval arguments are never (partially) passed via registers.
define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; X32-LABEL: large_caller:
; X32: # %bb.0: # %entry
; X32-NEXT: jmp large_callee@PLT # TAILCALL
;
; X64-LABEL: large_caller:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp large_callee@PLT # TAILCALL
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; The IR for this one looks dodgy, because it has an alloca passed to a
; musttail function, but it is passed as a byval argument, so will be copied
; into the stack space allocated by @large_caller_new_value's caller, so is
; valid.
define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; X32-LABEL: large_caller_new_value:
; X32: # %bb.0: # %entry
; X32-NEXT: sub esp, 20
; X32-NEXT: .cfi_def_cfa_offset 24
; X32-NEXT: mov dword ptr [esp], 0
; X32-NEXT: mov dword ptr [esp + 4], 1
; X32-NEXT: mov dword ptr [esp + 8], 2
; X32-NEXT: mov dword ptr [esp + 12], 3
; X32-NEXT: mov dword ptr [esp + 16], 4
; X32-NEXT: mov dword ptr [esp + 24], 0
; X32-NEXT: mov dword ptr [esp + 28], 1
; X32-NEXT: mov dword ptr [esp + 32], 2
; X32-NEXT: mov dword ptr [esp + 36], 3
; X32-NEXT: mov dword ptr [esp + 40], 4
; X32-NEXT: add esp, 20
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: jmp large_callee@PLT # TAILCALL
;
; X64-LABEL: large_caller_new_value:
; X64: # %bb.0: # %entry
; X64-NEXT: movabs rax, 4294967296
; X64-NEXT: mov qword ptr [rsp - 20], rax
; X64-NEXT: movabs rcx, 12884901890
; X64-NEXT: mov qword ptr [rsp - 12], rcx
; X64-NEXT: mov dword ptr [rsp - 4], 4
; X64-NEXT: mov qword ptr [rsp + 8], rax
; X64-NEXT: mov qword ptr [rsp + 16], rcx
; X64-NEXT: mov dword ptr [rsp + 24], 4
; X64-NEXT: jmp large_callee@PLT # TAILCALL
entry:
%y = alloca %twenty_bytes, align 4
store i32 0, ptr %y, align 4
%0 = getelementptr inbounds i8, ptr %y, i32 4
store i32 1, ptr %0, align 4
%1 = getelementptr inbounds i8, ptr %y, i32 8
store i32 2, ptr %1, align 4
%2 = getelementptr inbounds i8, ptr %y, i32 12
store i32 3, ptr %2, align 4
%3 = getelementptr inbounds i8, ptr %y, i32 16
store i32 4, ptr %3, align 4
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
ret void
}
declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; X32-LABEL: swap_byvals:
; X32: # %bb.0: # %entry
; X32-NEXT: sub esp, 40
; X32-NEXT: .cfi_def_cfa_offset 44
; X32-NEXT: mov eax, dword ptr [esp + 60]
; X32-NEXT: mov dword ptr [esp + 16], eax
; X32-NEXT: mov eax, dword ptr [esp + 56]
; X32-NEXT: mov dword ptr [esp + 12], eax
; X32-NEXT: mov eax, dword ptr [esp + 52]
; X32-NEXT: mov dword ptr [esp + 8], eax
; X32-NEXT: mov eax, dword ptr [esp + 44]
; X32-NEXT: mov ecx, dword ptr [esp + 48]
; X32-NEXT: mov dword ptr [esp + 4], ecx
; X32-NEXT: mov dword ptr [esp], eax
; X32-NEXT: mov eax, dword ptr [esp + 80]
; X32-NEXT: mov dword ptr [esp + 36], eax
; X32-NEXT: mov eax, dword ptr [esp + 76]
; X32-NEXT: mov dword ptr [esp + 32], eax
; X32-NEXT: mov eax, dword ptr [esp + 72]
; X32-NEXT: mov dword ptr [esp + 28], eax
; X32-NEXT: mov eax, dword ptr [esp + 64]
; X32-NEXT: mov ecx, dword ptr [esp + 68]
; X32-NEXT: mov dword ptr [esp + 24], ecx
; X32-NEXT: mov dword ptr [esp + 20], eax
; X32-NEXT: add esp, 40
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: jmp two_byvals_callee@PLT # TAILCALL
;
; X64-LABEL: swap_byvals:
; X64: # %bb.0: # %entry
; X64-NEXT: mov eax, dword ptr [rsp + 24]
; X64-NEXT: mov dword ptr [rsp - 8], eax
; X64-NEXT: movaps xmm0, xmmword ptr [rsp + 8]
; X64-NEXT: movaps xmmword ptr [rsp - 24], xmm0
; X64-NEXT: mov eax, dword ptr [rsp + 48]
; X64-NEXT: mov dword ptr [rsp - 32], eax
; X64-NEXT: mov rax, qword ptr [rsp + 32]
; X64-NEXT: mov rcx, qword ptr [rsp + 40]
; X64-NEXT: mov qword ptr [rsp - 40], rcx
; X64-NEXT: mov qword ptr [rsp - 48], rax
; X64-NEXT: jmp two_byvals_callee@PLT # TAILCALL
entry:
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; A forwarded byval arg, but at a different argument position. Because
; x86 does not (partially) pass byval arguments in registers, the byval
; arg is in the correct position already, so this is not a sibcall but
; can be tail-call optimized.
declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; X32-LABEL: shift_byval:
; X32: # %bb.0: # %entry
; X32-NEXT: push edi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: push esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset esi, -12
; X32-NEXT: .cfi_offset edi, -8
; X32-NEXT: mov eax, dword ptr [esp + 32]
; X32-NEXT: mov ecx, dword ptr [esp + 28]
; X32-NEXT: mov edx, dword ptr [esp + 24]
; X32-NEXT: mov esi, dword ptr [esp + 16]
; X32-NEXT: mov edi, dword ptr [esp + 20]
; X32-NEXT: push eax
; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: push ecx
; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: push edx
; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: push edi
; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: push esi
; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: call shift_byval_callee@PLT
; X32-NEXT: add esp, 20
; X32-NEXT: .cfi_adjust_cfa_offset -20
; X32-NEXT: pop esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pop edi
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: ret
;
; X64-LABEL: shift_byval:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp shift_byval_callee@PLT # TAILCALL
entry:
tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
ret void
}
; A global object passed to a byval argument, so it must be copied, but doesn't
; need a stack temporary.
@large_global = external global %twenty_bytes
define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; X32-LABEL: large_caller_from_global:
; X32: # %bb.0: # %entry
; X32-NEXT: mov eax, dword ptr [large_global+16]
; X32-NEXT: mov dword ptr [esp + 20], eax
; X32-NEXT: mov eax, dword ptr [large_global+12]
; X32-NEXT: mov dword ptr [esp + 16], eax
; X32-NEXT: mov eax, dword ptr [large_global+8]
; X32-NEXT: mov dword ptr [esp + 12], eax
; X32-NEXT: mov eax, dword ptr [large_global+4]
; X32-NEXT: mov dword ptr [esp + 8], eax
; X32-NEXT: mov eax, dword ptr [large_global]
; X32-NEXT: mov dword ptr [esp + 4], eax
; X32-NEXT: jmp large_callee@PLT # TAILCALL
;
; X64-LABEL: large_caller_from_global:
; X64: # %bb.0: # %entry
; X64-NEXT: mov rax, qword ptr [rip + large_global@GOTPCREL]
; X64-NEXT: mov ecx, dword ptr [rax + 16]
; X64-NEXT: mov dword ptr [rsp + 24], ecx
; X64-NEXT: mov rcx, qword ptr [rax]
; X64-NEXT: mov rax, qword ptr [rax + 8]
; X64-NEXT: mov qword ptr [rsp + 16], rax
; X64-NEXT: mov qword ptr [rsp + 8], rcx
; X64-NEXT: jmp large_callee@PLT # TAILCALL
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
ret void
}