| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2,cx16 | FileCheck %s --check-prefixes=X64-SSE |
| ; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx,cx16 | FileCheck %s --check-prefixes=X64-AVX |
| ; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f,cx16 | FileCheck %s --check-prefixes=X64-AVX |
| |
| ; Codegen of fp128 without cx16 is tested in atomic-nocx16.ll |
| |
| define void @store_fp128(ptr %fptr, fp128 %v) { |
| ; X64-SSE-LABEL: store_fp128: |
| ; X64-SSE: # %bb.0: |
| ; X64-SSE-NEXT: pushq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-SSE-NEXT: .cfi_offset %rbx, -16 |
| ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx |
| ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; X64-SSE-NEXT: movq (%rdi), %rax |
| ; X64-SSE-NEXT: movq 8(%rdi), %rdx |
| ; X64-SSE-NEXT: .p2align 4 |
| ; X64-SSE-NEXT: .LBB0_1: # %atomicrmw.start |
| ; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X64-SSE-NEXT: lock cmpxchg16b (%rdi) |
| ; X64-SSE-NEXT: jne .LBB0_1 |
| ; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end |
| ; X64-SSE-NEXT: popq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-SSE-NEXT: retq |
| ; |
| ; X64-AVX-LABEL: store_fp128: |
| ; X64-AVX: # %bb.0: |
| ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) |
| ; X64-AVX-NEXT: retq |
| store atomic fp128 %v, ptr %fptr unordered, align 16 |
| ret void |
| } |
| |
| define fp128 @load_fp128(ptr %fptr) { |
| ; X64-SSE-LABEL: load_fp128: |
| ; X64-SSE: # %bb.0: |
| ; X64-SSE-NEXT: pushq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-SSE-NEXT: .cfi_offset %rbx, -16 |
| ; X64-SSE-NEXT: xorl %eax, %eax |
| ; X64-SSE-NEXT: xorl %edx, %edx |
| ; X64-SSE-NEXT: xorl %ecx, %ecx |
| ; X64-SSE-NEXT: xorl %ebx, %ebx |
| ; X64-SSE-NEXT: lock cmpxchg16b (%rdi) |
| ; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 |
| ; X64-SSE-NEXT: popq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-SSE-NEXT: retq |
| ; |
| ; X64-AVX-LABEL: load_fp128: |
| ; X64-AVX: # %bb.0: |
| ; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 |
| ; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 |
| ; X64-AVX-NEXT: retq |
| %v = load atomic fp128, ptr %fptr unordered, align 16 |
| ret fp128 %v |
| } |
| |
| define fp128 @exchange_fp128(ptr %fptr, fp128 %x) { |
| ; X64-SSE-LABEL: exchange_fp128: |
| ; X64-SSE: # %bb.0: |
| ; X64-SSE-NEXT: pushq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-SSE-NEXT: .cfi_offset %rbx, -16 |
| ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx |
| ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; X64-SSE-NEXT: movq (%rdi), %rax |
| ; X64-SSE-NEXT: movq 8(%rdi), %rdx |
| ; X64-SSE-NEXT: .p2align 4 |
| ; X64-SSE-NEXT: .LBB2_1: # %atomicrmw.start |
| ; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X64-SSE-NEXT: lock cmpxchg16b (%rdi) |
| ; X64-SSE-NEXT: jne .LBB2_1 |
| ; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end |
| ; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 |
| ; X64-SSE-NEXT: popq %rbx |
| ; X64-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-SSE-NEXT: retq |
| ; |
| ; X64-AVX-LABEL: exchange_fp128: |
| ; X64-AVX: # %bb.0: |
| ; X64-AVX-NEXT: pushq %rbx |
| ; X64-AVX-NEXT: .cfi_def_cfa_offset 16 |
| ; X64-AVX-NEXT: .cfi_offset %rbx, -16 |
| ; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rbx |
| ; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; X64-AVX-NEXT: movq (%rdi), %rax |
| ; X64-AVX-NEXT: movq 8(%rdi), %rdx |
| ; X64-AVX-NEXT: .p2align 4 |
| ; X64-AVX-NEXT: .LBB2_1: # %atomicrmw.start |
| ; X64-AVX-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X64-AVX-NEXT: lock cmpxchg16b (%rdi) |
| ; X64-AVX-NEXT: jne .LBB2_1 |
| ; X64-AVX-NEXT: # %bb.2: # %atomicrmw.end |
| ; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; X64-AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 |
| ; X64-AVX-NEXT: popq %rbx |
| ; X64-AVX-NEXT: .cfi_def_cfa_offset 8 |
| ; X64-AVX-NEXT: retq |
| %v = atomicrmw xchg ptr %fptr, fp128 %x monotonic, align 16 |
| ret fp128 %v |
| } |
| |