| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=i686-windows < %s | FileCheck %s |
| |
| declare void @addrof_i1(i1*) |
| declare void @addrof_i32(i32*) |
| declare void @addrof_i64(i64*) |
| declare void @addrof_i128(i128*) |
| declare void @addrof_i32_x3(i32*, i32*, i32*) |
| |
| define void @simple(i32 %x) { |
| ; CHECK-LABEL: simple: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i32 |
| store i32 %x, i32* %x.addr |
| call void @addrof_i32(i32* %x.addr) |
| ret void |
| } |
| |
| ; We need to load %x before calling addrof_i32 now because it could mutate %x in |
| ; place. |
| |
| define i32 @use_arg(i32 %x) { |
| ; CHECK-LABEL: use_arg: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i32 |
| store i32 %x, i32* %x.addr |
| call void @addrof_i32(i32* %x.addr) |
| ret i32 %x |
| } |
| |
| ; We won't copy elide for types needing legalization such as i64 or i1. |
| |
| define i64 @split_i64(i64 %x) { |
| ; CHECK-LABEL: split_i64: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i64 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: movl %edi, %edx |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i64, align 4 |
| store i64 %x, i64* %x.addr, align 4 |
| call void @addrof_i64(i64* %x.addr) |
| ret i64 %x |
| } |
| |
| define i1 @i1_arg(i1 %x) { |
| ; CHECK-LABEL: i1_arg: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pushl %ebx |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl |
| ; CHECK-NEXT: movl %ebx, %eax |
| ; CHECK-NEXT: andb $1, %al |
| ; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i1 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %ebx, %eax |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: popl %ebx |
| ; CHECK-NEXT: retl |
| %x.addr = alloca i1 |
| store i1 %x, i1* %x.addr |
| call void @addrof_i1(i1* %x.addr) |
| ret i1 %x |
| } |
| |
| ; We can't copy elide when an i64 is split between registers and memory in a |
| ; fastcc function. |
| |
| define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) { |
| ; CHECK-LABEL: fastcc_split_i64: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: subl $8, %esp |
| ; CHECK-NEXT: movl %edx, %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %edx, (%esp) |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i64 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: movl %edi, %edx |
| ; CHECK-NEXT: addl $8, %esp |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i64, align 4 |
| store i64 %x, i64* %x.addr, align 4 |
| call void @addrof_i64(i64* %x.addr) |
| ret i64 %x |
| } |
| |
| ; We can't copy elide when it would reduce the user requested alignment. |
| |
| define void @high_alignment(i32 %x) { |
| ; CHECK-LABEL: high_alignment: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %ebp |
| ; CHECK-NEXT: movl %esp, %ebp |
| ; CHECK-NEXT: andl $-128, %esp |
| ; CHECK-NEXT: subl $128, %esp |
| ; CHECK-NEXT: movl 8(%ebp), %eax |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %ebp, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| entry: |
| %x.p = alloca i32, align 128 |
| store i32 %x, i32* %x.p |
| call void @addrof_i32(i32* %x.p) |
| ret void |
| } |
| |
| ; We can't copy elide when it would reduce the ABI required alignment. |
| ; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC |
| ; doesn't guarantee it. |
| |
| define void @abi_alignment(i64 %x) { |
| ; CHECK-LABEL: abi_alignment: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %ebp |
| ; CHECK-NEXT: movl %esp, %ebp |
| ; CHECK-NEXT: andl $-8, %esp |
| ; CHECK-NEXT: subl $8, %esp |
| ; CHECK-NEXT: movl 8(%ebp), %eax |
| ; CHECK-NEXT: movl 12(%ebp), %ecx |
| ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i64 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %ebp, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| entry: |
| %x.p = alloca i64 |
| store i64 %x, i64* %x.p |
| call void @addrof_i64(i64* %x.p) |
| ret void |
| } |
| |
| ; The code we generate for this is unimportant. This is mostly a crash test. |
| |
| define void @split_i128(i128* %sret, i128 %x) { |
| ; CHECK-LABEL: split_i128: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %ebp |
| ; CHECK-NEXT: movl %esp, %ebp |
| ; CHECK-NEXT: pushl %ebx |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: andl $-8, %esp |
| ; CHECK-NEXT: subl $32, %esp |
| ; CHECK-NEXT: movl 12(%ebp), %eax |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: movl 16(%ebp), %ebx |
| ; CHECK-NEXT: movl 20(%ebp), %esi |
| ; CHECK-NEXT: movl 24(%ebp), %edi |
| ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i128 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl 8(%ebp), %eax |
| ; CHECK-NEXT: movl %edi, 12(%eax) |
| ; CHECK-NEXT: movl %esi, 8(%eax) |
| ; CHECK-NEXT: movl %ebx, 4(%eax) |
| ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; CHECK-NEXT: movl %ecx, (%eax) |
| ; CHECK-NEXT: leal -12(%ebp), %esp |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| ; CHECK-NEXT: popl %ebx |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i128 |
| store i128 %x, i128* %x.addr |
| call void @addrof_i128(i128* %x.addr) |
| store i128 %x, i128* %sret |
| ret void |
| } |
| |
| ; Check that we load all of x, y, and z before the call. |
| |
| define i32 @three_args(i32 %x, i32 %y, i32 %z) { |
| ; CHECK-LABEL: three_args: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: pushl %ecx |
| ; CHECK-NEXT: pushl %edx |
| ; CHECK-NEXT: calll _addrof_i32_x3 |
| ; CHECK-NEXT: addl $12, %esp |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: retl |
| entry: |
| %z.addr = alloca i32, align 4 |
| %y.addr = alloca i32, align 4 |
| %x.addr = alloca i32, align 4 |
| store i32 %z, i32* %z.addr, align 4 |
| store i32 %y, i32* %y.addr, align 4 |
| store i32 %x, i32* %x.addr, align 4 |
| call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr) |
| %s1 = add i32 %x, %y |
| %sum = add i32 %s1, %z |
| ret i32 %sum |
| } |
| |
| define void @two_args_same_alloca(i32 %x, i32 %y) { |
| ; CHECK-LABEL: two_args_same_alloca: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: retl |
| entry: |
| %x.addr = alloca i32 |
| store i32 %x, i32* %x.addr |
| store i32 %y, i32* %x.addr |
| call void @addrof_i32(i32* %x.addr) |
| ret void |
| } |
| |
| define void @avoid_byval(i32* byval(i32) %x) { |
| ; CHECK-LABEL: avoid_byval: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: popl %eax |
| ; CHECK-NEXT: retl |
| entry: |
| %x.p.p = alloca i32* |
| store i32* %x, i32** %x.p.p |
| call void @addrof_i32(i32* %x) |
| ret void |
| } |
| |
| define void @avoid_inalloca(i32* inalloca(i32) %x) { |
| ; CHECK-LABEL: avoid_inalloca: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: popl %eax |
| ; CHECK-NEXT: retl |
| entry: |
| %x.p.p = alloca i32* |
| store i32* %x, i32** %x.p.p |
| call void @addrof_i32(i32* %x) |
| ret void |
| } |
| |
| define void @avoid_preallocated(i32* preallocated(i32) %x) { |
| ; CHECK-LABEL: avoid_preallocated: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: popl %eax |
| ; CHECK-NEXT: retl |
| entry: |
| %x.p.p = alloca i32* |
| store i32* %x, i32** %x.p.p |
| call void @addrof_i32(i32* %x) |
| ret void |
| } |
| |
| ; Don't elide the copy when the alloca is escaped with a store. |
| define void @escape_with_store(i32 %x) { |
| ; CHECK-LABEL: escape_with_store: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: subl $8, %esp |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %esp, %ecx |
| ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: pushl %ecx |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $12, %esp |
| ; CHECK-NEXT: retl |
| %x1 = alloca i32 |
| %x2 = alloca i32* |
| store i32* %x1, i32** %x2 |
| %x3 = load i32*, i32** %x2 |
| store i32 0, i32* %x3 |
| store i32 %x, i32* %x1 |
| call void @addrof_i32(i32* %x1) |
| ret void |
| } |
| |
| ; This test case exposed issues with the use of TokenFactor. |
| |
| define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) { |
| ; CHECK-LABEL: sret_and_elide: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %edi, (%esi) |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| ; CHECK-NEXT: retl |
| %v.p = alloca i32 |
| store i32 %v, i32* %v.p |
| call void @addrof_i32(i32* %v.p) |
| store i32 %v, i32* %sret |
| ret void |
| } |
| |
| define void @avoid_partially_initialized_alloca(i32 %x) { |
| ; CHECK-LABEL: avoid_partially_initialized_alloca: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pushl %ebp |
| ; CHECK-NEXT: movl %esp, %ebp |
| ; CHECK-NEXT: andl $-8, %esp |
| ; CHECK-NEXT: subl $8, %esp |
| ; CHECK-NEXT: movl 8(%ebp), %eax |
| ; CHECK-NEXT: movl %eax, (%esp) |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: calll _addrof_i32 |
| ; CHECK-NEXT: addl $4, %esp |
| ; CHECK-NEXT: movl %ebp, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| %a = alloca i64 |
| %p = bitcast i64* %a to i32* |
| store i32 %x, i32* %p |
| call void @addrof_i32(i32* %p) |
| ret void |
| } |
| |
| ; Ensure no copy elision happens as the two i3 values fed into icmp may have |
| ; garbage in the upper bits, a truncation is needed. |
| |
| define i1 @use_i3(i3 %a1, i3 %a2) { |
| ; CHECK-LABEL: use_i3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pushl %eax |
| ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; CHECK-NEXT: andb $7, %al |
| ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl |
| ; CHECK-NEXT: andb $7, %cl |
| ; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: cmpb %cl, %al |
| ; CHECK-NEXT: sete %al |
| ; CHECK-NEXT: popl %ecx |
| ; CHECK-NEXT: retl |
| %tmp = alloca i3 |
| store i3 %a2, i3* %tmp |
| %val = load i3, i3* %tmp |
| %res = icmp eq i3 %a1, %val |
| ret i1 %res |
| } |
| |