| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX2-O3 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX512-O3 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX2-O0 |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX512-O0 |
| |
| define void @test1(ptr %ptr, i32 %val1) { |
| ; CHECK-LABEL: test1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xchgl %esi, (%rdi) |
| ; CHECK-NEXT: retq |
| store atomic i32 %val1, ptr %ptr seq_cst, align 4 |
| ret void |
| } |
| |
| define void @test2(ptr %ptr, i32 %val1) { |
| ; CHECK-LABEL: test2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl %esi, (%rdi) |
| ; CHECK-NEXT: retq |
| store atomic i32 %val1, ptr %ptr release, align 4 |
| ret void |
| } |
| |
| define i32 @test3(ptr %ptr) { |
| ; CHECK-LABEL: test3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl (%rdi), %eax |
| ; CHECK-NEXT: retq |
| %val = load atomic i32, ptr %ptr seq_cst, align 4 |
| ret i32 %val |
| } |
| |
| define <1 x i32> @atomic_vec1_i32(ptr %x) { |
| ; CHECK-LABEL: atomic_vec1_i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl (%rdi), %eax |
| ; CHECK-NEXT: retq |
| %ret = load atomic <1 x i32>, ptr %x acquire, align 4 |
| ret <1 x i32> %ret |
| } |
| |
| define <1 x i8> @atomic_vec1_i8(ptr %x) { |
| ; CHECK-O3-LABEL: atomic_vec1_i8: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movzbl (%rdi), %eax |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_i8: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movb (%rdi), %al |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x i8>, ptr %x acquire, align 1 |
| ret <1 x i8> %ret |
| } |
| |
| define <1 x i16> @atomic_vec1_i16(ptr %x) { |
| ; CHECK-O3-LABEL: atomic_vec1_i16: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_i16: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movw (%rdi), %ax |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x i16>, ptr %x acquire, align 2 |
| ret <1 x i16> %ret |
| } |
| |
| define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { |
| ; CHECK-O3-LABEL: atomic_vec1_i8_zext: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movzbl (%rdi), %eax |
| ; CHECK-O3-NEXT: movzbl %al, %eax |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_i8_zext: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movb (%rdi), %al |
| ; CHECK-O0-NEXT: movzbl %al, %eax |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x i8>, ptr %x acquire, align 1 |
| %zret = zext <1 x i8> %ret to <1 x i32> |
| ret <1 x i32> %zret |
| } |
| |
| define <1 x i64> @atomic_vec1_i16_sext(ptr %x) { |
| ; CHECK-O3-LABEL: atomic_vec1_i16_sext: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-O3-NEXT: movswq %ax, %rax |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_i16_sext: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movw (%rdi), %ax |
| ; CHECK-O0-NEXT: movswq %ax, %rax |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x i16>, ptr %x acquire, align 2 |
| %sret = sext <1 x i16> %ret to <1 x i64> |
| ret <1 x i64> %sret |
| } |
| |
| define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) { |
| ; CHECK-LABEL: atomic_vec1_ptr270: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl (%rdi), %eax |
| ; CHECK-NEXT: retq |
| %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4 |
| ret <1 x ptr addrspace(270)> %ret |
| } |
| |
| define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { |
| ; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx |
| ; CHECK-SSE-O0-NEXT: # implicit-def: $eax |
| ; CHECK-SSE-O0-NEXT: movw %cx, %ax |
| ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 |
| ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx |
| ; CHECK-AVX-O0-NEXT: # implicit-def: $eax |
| ; CHECK-AVX-O0-NEXT: movw %cx, %ax |
| ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 |
| ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2 |
| ret <1 x bfloat> %ret |
| } |
| |
| define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind { |
| ; CHECK-LABEL: atomic_vec1_ptr_align: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq (%rdi), %rax |
| ; CHECK-NEXT: retq |
| %ret = load atomic <1 x ptr>, ptr %x acquire, align 8 |
| ret <1 x ptr> %ret |
| } |
| |
| define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { |
| ; CHECK-LABEL: atomic_vec1_i64_align: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq (%rdi), %rax |
| ; CHECK-NEXT: retq |
| %ret = load atomic <1 x i64>, ptr %x acquire, align 8 |
| ret <1 x i64> %ret |
| } |
| |
| define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { |
| ; CHECK-O3-LABEL: atomic_vec1_ptr: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: pushq %rax |
| ; CHECK-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-O3-NEXT: movl $8, %edi |
| ; CHECK-O3-NEXT: movl $2, %ecx |
| ; CHECK-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-O3-NEXT: movq (%rsp), %rax |
| ; CHECK-O3-NEXT: popq %rcx |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_ptr: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: pushq %rax |
| ; CHECK-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-O0-NEXT: movl $8, %edi |
| ; CHECK-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-O0-NEXT: movl $2, %ecx |
| ; CHECK-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-O0-NEXT: movq (%rsp), %rax |
| ; CHECK-O0-NEXT: popq %rcx |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 |
| ret <1 x ptr> %ret |
| } |
| |
| define <1 x half> @atomic_vec1_half(ptr %x) { |
| ; CHECK-SSE-O3-LABEL: atomic_vec1_half: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec1_half: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec1_half: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx |
| ; CHECK-SSE-O0-NEXT: # implicit-def: $eax |
| ; CHECK-SSE-O0-NEXT: movw %cx, %ax |
| ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 |
| ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec1_half: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx |
| ; CHECK-AVX-O0-NEXT: # implicit-def: $eax |
| ; CHECK-AVX-O0-NEXT: movw %cx, %ax |
| ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 |
| ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <1 x half>, ptr %x acquire, align 2 |
| ret <1 x half> %ret |
| } |
| |
| define <1 x float> @atomic_vec1_float(ptr %x) { |
| ; CHECK-SSE-O3-LABEL: atomic_vec1_float: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec1_float: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec1_float: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec1_float: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <1 x float>, ptr %x acquire, align 4 |
| ret <1 x float> %ret |
| } |
| |
| define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec1_double_align: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec1_double_align: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec1_double_align: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec1_double_align: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <1 x double>, ptr %x acquire, align 8 |
| ret <1 x double> %ret |
| } |
| |
| define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { |
| ; CHECK-O3-LABEL: atomic_vec1_i64: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: pushq %rax |
| ; CHECK-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-O3-NEXT: movl $8, %edi |
| ; CHECK-O3-NEXT: movl $2, %ecx |
| ; CHECK-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-O3-NEXT: movq (%rsp), %rax |
| ; CHECK-O3-NEXT: popq %rcx |
| ; CHECK-O3-NEXT: retq |
| ; |
| ; CHECK-O0-LABEL: atomic_vec1_i64: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: pushq %rax |
| ; CHECK-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-O0-NEXT: movl $8, %edi |
| ; CHECK-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-O0-NEXT: movl $2, %ecx |
| ; CHECK-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-O0-NEXT: movq (%rsp), %rax |
| ; CHECK-O0-NEXT: popq %rcx |
| ; CHECK-O0-NEXT: retq |
| %ret = load atomic <1 x i64>, ptr %x acquire, align 4 |
| ret <1 x i64> %ret |
| } |
| |
| define <1 x double> @atomic_vec1_double(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec1_double: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: pushq %rax |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $8, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O3-NEXT: popq %rax |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec1_double: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: pushq %rax |
| ; CHECK-AVX-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O3-NEXT: movl $8, %edi |
| ; CHECK-AVX-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O3-NEXT: popq %rax |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec1_double: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: pushq %rax |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $8, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O0-NEXT: popq %rax |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec1_double: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: pushq %rax |
| ; CHECK-AVX-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O0-NEXT: movl $8, %edi |
| ; CHECK-AVX-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O0-NEXT: popq %rax |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <1 x double>, ptr %x acquire, align 4 |
| ret <1 x double> %ret |
| } |
| |
| define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec2_i32: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: pushq %rax |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $8, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O3-NEXT: popq %rax |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec2_i32: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: pushq %rax |
| ; CHECK-AVX-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O3-NEXT: movl $8, %edi |
| ; CHECK-AVX-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O3-NEXT: popq %rax |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec2_i32: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: pushq %rax |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $8, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-SSE-O0-NEXT: popq %rax |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec2_i32: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: pushq %rax |
| ; CHECK-AVX-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O0-NEXT: movl $8, %edi |
| ; CHECK-AVX-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O0-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero |
| ; CHECK-AVX-O0-NEXT: popq %rax |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <2 x i32>, ptr %x acquire, align 4 |
| ret <2 x i32> %ret |
| } |
| |
| define <4 x float> @atomic_vec4_float(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec4_float: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: subq $24, %rsp |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $16, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O3-NEXT: addq $24, %rsp |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec4_float: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: subq $24, %rsp |
| ; CHECK-AVX-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O3-NEXT: movl $16, %edi |
| ; CHECK-AVX-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O3-NEXT: vmovaps (%rsp), %xmm0 |
| ; CHECK-AVX-O3-NEXT: addq $24, %rsp |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec4_float: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: subq $24, %rsp |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $16, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O0-NEXT: addq $24, %rsp |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec4_float: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: subq $24, %rsp |
| ; CHECK-AVX-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O0-NEXT: movl $16, %edi |
| ; CHECK-AVX-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O0-NEXT: vmovaps (%rsp), %xmm0 |
| ; CHECK-AVX-O0-NEXT: addq $24, %rsp |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <4 x float>, ptr %x acquire, align 4 |
| ret <4 x float> %ret |
| } |
| |
| define <8 x double> @atomic_vec8_double(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec8_double: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: subq $72, %rsp |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $64, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 |
| ; CHECK-SSE-O3-NEXT: addq $72, %rsp |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX2-O3-LABEL: atomic_vec8_double: |
| ; CHECK-AVX2-O3: # %bb.0: |
| ; CHECK-AVX2-O3-NEXT: subq $72, %rsp |
| ; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX2-O3-NEXT: movl $64, %edi |
| ; CHECK-AVX2-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0 |
| ; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 |
| ; CHECK-AVX2-O3-NEXT: addq $72, %rsp |
| ; CHECK-AVX2-O3-NEXT: retq |
| ; |
| ; CHECK-AVX512-O3-LABEL: atomic_vec8_double: |
| ; CHECK-AVX512-O3: # %bb.0: |
| ; CHECK-AVX512-O3-NEXT: subq $72, %rsp |
| ; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX512-O3-NEXT: movl $64, %edi |
| ; CHECK-AVX512-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0 |
| ; CHECK-AVX512-O3-NEXT: addq $72, %rsp |
| ; CHECK-AVX512-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec8_double: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: subq $72, %rsp |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $64, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movapd (%rsp), %xmm0 |
| ; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 |
| ; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 |
| ; CHECK-SSE-O0-NEXT: addq $72, %rsp |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX2-O0-LABEL: atomic_vec8_double: |
| ; CHECK-AVX2-O0: # %bb.0: |
| ; CHECK-AVX2-O0-NEXT: subq $72, %rsp |
| ; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX2-O0-NEXT: movl $64, %edi |
| ; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX2-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX2-O0-NEXT: vmovupd (%rsp), %ymm0 |
| ; CHECK-AVX2-O0-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm1 |
| ; CHECK-AVX2-O0-NEXT: addq $72, %rsp |
| ; CHECK-AVX2-O0-NEXT: retq |
| ; |
| ; CHECK-AVX512-O0-LABEL: atomic_vec8_double: |
| ; CHECK-AVX512-O0: # %bb.0: |
| ; CHECK-AVX512-O0-NEXT: subq $72, %rsp |
| ; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX512-O0-NEXT: movl $64, %edi |
| ; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX512-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX512-O0-NEXT: vmovupd (%rsp), %zmm0 |
| ; CHECK-AVX512-O0-NEXT: addq $72, %rsp |
| ; CHECK-AVX512-O0-NEXT: retq |
| %ret = load atomic <8 x double>, ptr %x acquire, align 4 |
| ret <8 x double> %ret |
| } |
| |
| define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: subq $40, %rsp |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $32, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O3-NEXT: addq $40, %rsp |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat: |
| ; CHECK-AVX-O3: # %bb.0: |
| ; CHECK-AVX-O3-NEXT: subq $40, %rsp |
| ; CHECK-AVX-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O3-NEXT: movl $32, %edi |
| ; CHECK-AVX-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O3-NEXT: vmovups (%rsp), %ymm0 |
| ; CHECK-AVX-O3-NEXT: addq $40, %rsp |
| ; CHECK-AVX-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: subq $40, %rsp |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $32, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O0-NEXT: addq $40, %rsp |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat: |
| ; CHECK-AVX-O0: # %bb.0: |
| ; CHECK-AVX-O0-NEXT: subq $40, %rsp |
| ; CHECK-AVX-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX-O0-NEXT: movl $32, %edi |
| ; CHECK-AVX-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX-O0-NEXT: vmovups (%rsp), %ymm0 |
| ; CHECK-AVX-O0-NEXT: addq $40, %rsp |
| ; CHECK-AVX-O0-NEXT: retq |
| %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4 |
| ret <16 x bfloat> %ret |
| } |
| |
| define <32 x half> @atomic_vec32_half(ptr %x) nounwind { |
| ; CHECK-SSE-O3-LABEL: atomic_vec32_half: |
| ; CHECK-SSE-O3: # %bb.0: |
| ; CHECK-SSE-O3-NEXT: subq $72, %rsp |
| ; CHECK-SSE-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O3-NEXT: movl $64, %edi |
| ; CHECK-SSE-O3-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 |
| ; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 |
| ; CHECK-SSE-O3-NEXT: addq $72, %rsp |
| ; CHECK-SSE-O3-NEXT: retq |
| ; |
| ; CHECK-AVX2-O3-LABEL: atomic_vec32_half: |
| ; CHECK-AVX2-O3: # %bb.0: |
| ; CHECK-AVX2-O3-NEXT: subq $72, %rsp |
| ; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX2-O3-NEXT: movl $64, %edi |
| ; CHECK-AVX2-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0 |
| ; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 |
| ; CHECK-AVX2-O3-NEXT: addq $72, %rsp |
| ; CHECK-AVX2-O3-NEXT: retq |
| ; |
| ; CHECK-AVX512-O3-LABEL: atomic_vec32_half: |
| ; CHECK-AVX512-O3: # %bb.0: |
| ; CHECK-AVX512-O3-NEXT: subq $72, %rsp |
| ; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX512-O3-NEXT: movl $64, %edi |
| ; CHECK-AVX512-O3-NEXT: movl $2, %ecx |
| ; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0 |
| ; CHECK-AVX512-O3-NEXT: addq $72, %rsp |
| ; CHECK-AVX512-O3-NEXT: retq |
| ; |
| ; CHECK-SSE-O0-LABEL: atomic_vec32_half: |
| ; CHECK-SSE-O0: # %bb.0: |
| ; CHECK-SSE-O0-NEXT: subq $72, %rsp |
| ; CHECK-SSE-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-SSE-O0-NEXT: movl $64, %edi |
| ; CHECK-SSE-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-SSE-O0-NEXT: movl $2, %ecx |
| ; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 |
| ; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 |
| ; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 |
| ; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 |
| ; CHECK-SSE-O0-NEXT: addq $72, %rsp |
| ; CHECK-SSE-O0-NEXT: retq |
| ; |
| ; CHECK-AVX2-O0-LABEL: atomic_vec32_half: |
| ; CHECK-AVX2-O0: # %bb.0: |
| ; CHECK-AVX2-O0-NEXT: subq $72, %rsp |
| ; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX2-O0-NEXT: movl $64, %edi |
| ; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX2-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX2-O0-NEXT: vmovups (%rsp), %ymm0 |
| ; CHECK-AVX2-O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 |
| ; CHECK-AVX2-O0-NEXT: addq $72, %rsp |
| ; CHECK-AVX2-O0-NEXT: retq |
| ; |
| ; CHECK-AVX512-O0-LABEL: atomic_vec32_half: |
| ; CHECK-AVX512-O0: # %bb.0: |
| ; CHECK-AVX512-O0-NEXT: subq $72, %rsp |
| ; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi |
| ; CHECK-AVX512-O0-NEXT: movl $64, %edi |
| ; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx |
| ; CHECK-AVX512-O0-NEXT: movl $2, %ecx |
| ; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT |
| ; CHECK-AVX512-O0-NEXT: vmovups (%rsp), %zmm0 |
| ; CHECK-AVX512-O0-NEXT: addq $72, %rsp |
| ; CHECK-AVX512-O0-NEXT: retq |
| %ret = load atomic <32 x half>, ptr %x acquire, align 4 |
| ret <32 x half> %ret |
| } |