| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=X64 |
| ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2 |
| ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM |
| ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM |
| ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM |
| ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM |
| |
| ; On x86, an atomic rmw operation that does not modify the value in memory |
| ; (such as atomic add 0) can be replaced by an mfence followed by a mov. |
| ; This is explained (with the motivation for such an optimization) in |
| ; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
| |
| define i8 @add8(ptr %p) { |
| ; X64-LABEL: add8: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: movzbl (%rdi), %eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: add8: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: mfence |
| ; X86-SSE2-NEXT: movzbl (%eax), %eax |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SLM-LABEL: add8: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SLM-NEXT: xorl %eax, %eax |
| ; X86-SLM-NEXT: lock xaddb %al, (%ecx) |
| ; X86-SLM-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-SLM-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: add8: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-ATOM-NEXT: xorl %eax, %eax |
| ; X86-ATOM-NEXT: lock xaddb %al, (%ecx) |
| ; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| %1 = atomicrmw add ptr %p, i8 0 monotonic |
| ret i8 %1 |
| } |
| |
| define i16 @or16(ptr %p) { |
| ; X64-LABEL: or16: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: movzwl (%rdi), %eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: or16: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: mfence |
| ; X86-SSE2-NEXT: movzwl (%eax), %eax |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SLM-LABEL: or16: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SLM-NEXT: movzwl (%ecx), %eax |
| ; X86-SLM-NEXT: .p2align 4, 0x90 |
| ; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start |
| ; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx) |
| ; X86-SLM-NEXT: jne .LBB1_1 |
| ; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-SLM-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or16: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-ATOM-NEXT: movzwl (%ecx), %eax |
| ; X86-ATOM-NEXT: .p2align 4, 0x90 |
| ; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start |
| ; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx) |
| ; X86-ATOM-NEXT: jne .LBB1_1 |
| ; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-ATOM-NEXT: retl |
| %1 = atomicrmw or ptr %p, i16 0 acquire |
| ret i16 %1 |
| } |
| |
| define i32 @xor32(ptr %p) { |
| ; X64-LABEL: xor32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: xor32: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: mfence |
| ; X86-SSE2-NEXT: movl (%eax), %eax |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SLM-LABEL: xor32: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SLM-NEXT: movl (%ecx), %eax |
| ; X86-SLM-NEXT: .p2align 4, 0x90 |
| ; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start |
| ; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) |
| ; X86-SLM-NEXT: jne .LBB2_1 |
| ; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-SLM-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: xor32: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-ATOM-NEXT: movl (%ecx), %eax |
| ; X86-ATOM-NEXT: .p2align 4, 0x90 |
| ; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start |
| ; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) |
| ; X86-ATOM-NEXT: jne .LBB2_1 |
| ; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-ATOM-NEXT: retl |
| %1 = atomicrmw xor ptr %p, i32 0 release |
| ret i32 %1 |
| } |
| |
| define i64 @sub64(ptr %p) { |
| ; X64-LABEL: sub64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: sub64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: .cfi_def_cfa_offset 12 |
| ; X86-NEXT: .cfi_offset %esi, -12 |
| ; X86-NEXT: .cfi_offset %ebx, -8 |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl (%esi), %eax |
| ; X86-NEXT: movl 4(%esi), %edx |
| ; X86-NEXT: .p2align 4, 0x90 |
| ; X86-NEXT: .LBB3_1: # %atomicrmw.start |
| ; X86-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-NEXT: movl %edx, %ecx |
| ; X86-NEXT: movl %eax, %ebx |
| ; X86-NEXT: lock cmpxchg8b (%esi) |
| ; X86-NEXT: jne .LBB3_1 |
| ; X86-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-NEXT: retl |
| %1 = atomicrmw sub ptr %p, i64 0 seq_cst |
| ret i64 %1 |
| } |
| |
| define i128 @or128(ptr %p) { |
| ; X64-LABEL: or128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: pushq %rax |
| ; X64-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-NEXT: xorl %esi, %esi |
| ; X64-NEXT: xorl %edx, %edx |
| ; X64-NEXT: callq __sync_fetch_and_or_16@PLT |
| ; X64-NEXT: popq %rcx |
| ; X64-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: or128: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: pushl %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SSE2-NEXT: .cfi_offset %ebp, -8 |
| ; X86-SSE2-NEXT: movl %esp, %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-SSE2-NEXT: pushl %edi |
| ; X86-SSE2-NEXT: pushl %esi |
| ; X86-SSE2-NEXT: andl $-8, %esp |
| ; X86-SSE2-NEXT: subl $16, %esp |
| ; X86-SSE2-NEXT: .cfi_offset %esi, -16 |
| ; X86-SSE2-NEXT: .cfi_offset %edi, -12 |
| ; X86-SSE2-NEXT: movl 8(%ebp), %esi |
| ; X86-SSE2-NEXT: movl %esp, %eax |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl 12(%ebp) |
| ; X86-SSE2-NEXT: pushl %eax |
| ; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-SSE2-NEXT: addl $20, %esp |
| ; X86-SSE2-NEXT: movl (%esp), %eax |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-SSE2-NEXT: movl %edi, 8(%esi) |
| ; X86-SSE2-NEXT: movl %edx, 12(%esi) |
| ; X86-SSE2-NEXT: movl %eax, (%esi) |
| ; X86-SSE2-NEXT: movl %ecx, 4(%esi) |
| ; X86-SSE2-NEXT: movl %esi, %eax |
| ; X86-SSE2-NEXT: leal -8(%ebp), %esp |
| ; X86-SSE2-NEXT: popl %esi |
| ; X86-SSE2-NEXT: popl %edi |
| ; X86-SSE2-NEXT: popl %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-SSE2-NEXT: retl $4 |
| ; |
| ; X86-SLM-LABEL: or128: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: pushl %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SLM-NEXT: .cfi_offset %ebp, -8 |
| ; X86-SLM-NEXT: movl %esp, %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-SLM-NEXT: pushl %edi |
| ; X86-SLM-NEXT: pushl %esi |
| ; X86-SLM-NEXT: andl $-8, %esp |
| ; X86-SLM-NEXT: subl $16, %esp |
| ; X86-SLM-NEXT: .cfi_offset %esi, -16 |
| ; X86-SLM-NEXT: .cfi_offset %edi, -12 |
| ; X86-SLM-NEXT: movl 8(%ebp), %esi |
| ; X86-SLM-NEXT: movl 12(%ebp), %eax |
| ; X86-SLM-NEXT: movl %esp, %ecx |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl %eax |
| ; X86-SLM-NEXT: pushl %ecx |
| ; X86-SLM-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-SLM-NEXT: addl $20, %esp |
| ; X86-SLM-NEXT: movl (%esp), %eax |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-SLM-NEXT: movl %edi, 8(%esi) |
| ; X86-SLM-NEXT: movl %edx, 12(%esi) |
| ; X86-SLM-NEXT: movl %eax, (%esi) |
| ; X86-SLM-NEXT: movl %ecx, 4(%esi) |
| ; X86-SLM-NEXT: movl %esi, %eax |
| ; X86-SLM-NEXT: leal -8(%ebp), %esp |
| ; X86-SLM-NEXT: popl %esi |
| ; X86-SLM-NEXT: popl %edi |
| ; X86-SLM-NEXT: popl %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-SLM-NEXT: retl $4 |
| ; |
| ; X86-ATOM-LABEL: or128: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: pushl %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-ATOM-NEXT: .cfi_offset %ebp, -8 |
| ; X86-ATOM-NEXT: leal (%esp), %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-ATOM-NEXT: pushl %edi |
| ; X86-ATOM-NEXT: pushl %esi |
| ; X86-ATOM-NEXT: andl $-8, %esp |
| ; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp |
| ; X86-ATOM-NEXT: .cfi_offset %esi, -16 |
| ; X86-ATOM-NEXT: .cfi_offset %edi, -12 |
| ; X86-ATOM-NEXT: movl 8(%ebp), %esi |
| ; X86-ATOM-NEXT: movl 12(%ebp), %eax |
| ; X86-ATOM-NEXT: movl %esp, %ecx |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl %eax |
| ; X86-ATOM-NEXT: pushl %ecx |
| ; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp |
| ; X86-ATOM-NEXT: movl (%esp), %ecx |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-ATOM-NEXT: movl %eax, 8(%esi) |
| ; X86-ATOM-NEXT: movl %edi, 12(%esi) |
| ; X86-ATOM-NEXT: movl %ecx, (%esi) |
| ; X86-ATOM-NEXT: movl %esi, %eax |
| ; X86-ATOM-NEXT: movl %edx, 4(%esi) |
| ; X86-ATOM-NEXT: leal -8(%ebp), %esp |
| ; X86-ATOM-NEXT: popl %esi |
| ; X86-ATOM-NEXT: popl %edi |
| ; X86-ATOM-NEXT: popl %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-ATOM-NEXT: retl $4 |
| %1 = atomicrmw or ptr %p, i128 0 monotonic |
| ret i128 %1 |
| } |
| |
| ; For 'and', the idempotent value is (-1) |
| define i32 @and32 (ptr %p) { |
| ; X64-LABEL: and32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: and32: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: mfence |
| ; X86-SSE2-NEXT: movl (%eax), %eax |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SLM-LABEL: and32: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-SLM-NEXT: movl (%ecx), %eax |
| ; X86-SLM-NEXT: .p2align 4, 0x90 |
| ; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start |
| ; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) |
| ; X86-SLM-NEXT: jne .LBB5_1 |
| ; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-SLM-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: and32: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-ATOM-NEXT: movl (%ecx), %eax |
| ; X86-ATOM-NEXT: .p2align 4, 0x90 |
| ; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start |
| ; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) |
| ; X86-ATOM-NEXT: jne .LBB5_1 |
| ; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-ATOM-NEXT: retl |
| %1 = atomicrmw and ptr %p, i32 -1 acq_rel |
| ret i32 %1 |
| } |
| |
| define void @or32_nouse_monotonic(ptr %p) { |
| ; X64-LABEL: or32_nouse_monotonic: |
| ; X64: # %bb.0: |
| ; X64-NEXT: #MEMBARRIER |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or32_nouse_monotonic: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: #MEMBARRIER |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or32_nouse_monotonic: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: #MEMBARRIER |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i32 0 monotonic |
| ret void |
| } |
| |
| |
| define void @or32_nouse_acquire(ptr %p) { |
| ; X64-LABEL: or32_nouse_acquire: |
| ; X64: # %bb.0: |
| ; X64-NEXT: #MEMBARRIER |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or32_nouse_acquire: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: #MEMBARRIER |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or32_nouse_acquire: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: #MEMBARRIER |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i32 0 acquire |
| ret void |
| } |
| |
| define void @or32_nouse_release(ptr %p) { |
| ; X64-LABEL: or32_nouse_release: |
| ; X64: # %bb.0: |
| ; X64-NEXT: #MEMBARRIER |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or32_nouse_release: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: #MEMBARRIER |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or32_nouse_release: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: #MEMBARRIER |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i32 0 release |
| ret void |
| } |
| |
| define void @or32_nouse_acq_rel(ptr %p) { |
| ; X64-LABEL: or32_nouse_acq_rel: |
| ; X64: # %bb.0: |
| ; X64-NEXT: #MEMBARRIER |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or32_nouse_acq_rel: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: #MEMBARRIER |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or32_nouse_acq_rel: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: #MEMBARRIER |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i32 0 acq_rel |
| ret void |
| } |
| |
| define void @or32_nouse_seq_cst(ptr %p) { |
| ; X64-LABEL: or32_nouse_seq_cst: |
| ; X64: # %bb.0: |
| ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or32_nouse_seq_cst: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: lock orl $0, (%esp) |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or32_nouse_seq_cst: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: lock orl $0, (%esp) |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i32 0 seq_cst |
| ret void |
| } |
| |
| ; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded |
| define void @or64_nouse_seq_cst(ptr %p) { |
| ; X64-LABEL: or64_nouse_seq_cst: |
| ; X64: # %bb.0: |
| ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: or64_nouse_seq_cst: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: .cfi_def_cfa_offset 12 |
| ; X86-NEXT: .cfi_offset %esi, -12 |
| ; X86-NEXT: .cfi_offset %ebx, -8 |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl (%esi), %eax |
| ; X86-NEXT: movl 4(%esi), %edx |
| ; X86-NEXT: .p2align 4, 0x90 |
| ; X86-NEXT: .LBB11_1: # %atomicrmw.start |
| ; X86-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-NEXT: movl %edx, %ecx |
| ; X86-NEXT: movl %eax, %ebx |
| ; X86-NEXT: lock cmpxchg8b (%esi) |
| ; X86-NEXT: jne .LBB11_1 |
| ; X86-NEXT: # %bb.2: # %atomicrmw.end |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-NEXT: retl |
| atomicrmw or ptr %p, i64 0 seq_cst |
| ret void |
| } |
| |
| ; TODO: Don't need to lower as sync_and_fetch call |
| define void @or128_nouse_seq_cst(ptr %p) { |
| ; X64-LABEL: or128_nouse_seq_cst: |
| ; X64: # %bb.0: |
| ; X64-NEXT: pushq %rax |
| ; X64-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-NEXT: xorl %esi, %esi |
| ; X64-NEXT: xorl %edx, %edx |
| ; X64-NEXT: callq __sync_fetch_and_or_16@PLT |
| ; X64-NEXT: popq %rax |
| ; X64-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: or128_nouse_seq_cst: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: pushl %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SSE2-NEXT: .cfi_offset %ebp, -8 |
| ; X86-SSE2-NEXT: movl %esp, %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-SSE2-NEXT: andl $-8, %esp |
| ; X86-SSE2-NEXT: subl $16, %esp |
| ; X86-SSE2-NEXT: movl %esp, %eax |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl $0 |
| ; X86-SSE2-NEXT: pushl 8(%ebp) |
| ; X86-SSE2-NEXT: pushl %eax |
| ; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-SSE2-NEXT: addl $20, %esp |
| ; X86-SSE2-NEXT: movl %ebp, %esp |
| ; X86-SSE2-NEXT: popl %ebp |
| ; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SLM-LABEL: or128_nouse_seq_cst: |
| ; X86-SLM: # %bb.0: |
| ; X86-SLM-NEXT: pushl %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SLM-NEXT: .cfi_offset %ebp, -8 |
| ; X86-SLM-NEXT: movl %esp, %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-SLM-NEXT: andl $-8, %esp |
| ; X86-SLM-NEXT: subl $16, %esp |
| ; X86-SLM-NEXT: movl 8(%ebp), %eax |
| ; X86-SLM-NEXT: movl %esp, %ecx |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl $0 |
| ; X86-SLM-NEXT: pushl %eax |
| ; X86-SLM-NEXT: pushl %ecx |
| ; X86-SLM-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-SLM-NEXT: addl $20, %esp |
| ; X86-SLM-NEXT: movl %ebp, %esp |
| ; X86-SLM-NEXT: popl %ebp |
| ; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-SLM-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or128_nouse_seq_cst: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: pushl %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-ATOM-NEXT: .cfi_offset %ebp, -8 |
| ; X86-ATOM-NEXT: leal (%esp), %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-ATOM-NEXT: andl $-8, %esp |
| ; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp |
| ; X86-ATOM-NEXT: movl 8(%ebp), %eax |
| ; X86-ATOM-NEXT: movl %esp, %ecx |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl $0 |
| ; X86-ATOM-NEXT: pushl %eax |
| ; X86-ATOM-NEXT: pushl %ecx |
| ; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 |
| ; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp |
| ; X86-ATOM-NEXT: movl %ebp, %esp |
| ; X86-ATOM-NEXT: popl %ebp |
| ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i128 0 seq_cst |
| ret void |
| } |
| |
| |
| define void @or16_nouse_seq_cst(ptr %p) { |
| ; X64-LABEL: or16_nouse_seq_cst: |
| ; X64: # %bb.0: |
| ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or16_nouse_seq_cst: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: lock orl $0, (%esp) |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or16_nouse_seq_cst: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: lock orl $0, (%esp) |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i16 0 seq_cst |
| ret void |
| } |
| |
| define void @or8_nouse_seq_cst(ptr %p) { |
| ; X64-LABEL: or8_nouse_seq_cst: |
| ; X64: # %bb.0: |
| ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: retq |
| ; |
| ; X86-GENERIC-LABEL: or8_nouse_seq_cst: |
| ; X86-GENERIC: # %bb.0: |
| ; X86-GENERIC-NEXT: lock orl $0, (%esp) |
| ; X86-GENERIC-NEXT: retl |
| ; |
| ; X86-ATOM-LABEL: or8_nouse_seq_cst: |
| ; X86-ATOM: # %bb.0: |
| ; X86-ATOM-NEXT: lock orl $0, (%esp) |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: nop |
| ; X86-ATOM-NEXT: retl |
| atomicrmw or ptr %p, i8 0 seq_cst |
| ret void |
| } |