blob: 1173c45b4bfd8b01f1de1cb467e431d9d7177477 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX2-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX512-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX2-O0
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX512-O0
define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: xchgl %esi, (%rdi)
; CHECK-NEXT: retq
store atomic i32 %val1, ptr %ptr seq_cst, align 4
ret void
}
define void @test2(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: retq
store atomic i32 %val1, ptr %ptr release, align 4
ret void
}
define i32 @test3(ptr %ptr) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}
define <1 x i32> @atomic_vec1_i32(ptr %x) {
; CHECK-LABEL: atomic_vec1_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
%ret = load atomic <1 x i32>, ptr %x acquire, align 4
ret <1 x i32> %ret
}
define <1 x i8> @atomic_vec1_i8(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_i8:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzbl (%rdi), %eax
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_i8:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x i8>, ptr %x acquire, align 1
ret <1 x i8> %ret
}
define <1 x i16> @atomic_vec1_i16(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_i16:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_i16:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw (%rdi), %ax
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x i16>, ptr %x acquire, align 2
ret <1 x i16> %ret
}
define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_i8_zext:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzbl (%rdi), %eax
; CHECK-O3-NEXT: movzbl %al, %eax
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_i8_zext:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O0-NEXT: movzbl %al, %eax
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x i8>, ptr %x acquire, align 1
%zret = zext <1 x i8> %ret to <1 x i32>
ret <1 x i32> %zret
}
define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_i16_sext:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
; CHECK-O3-NEXT: movswq %ax, %rax
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_i16_sext:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw (%rdi), %ax
; CHECK-O0-NEXT: movswq %ax, %rax
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x i16>, ptr %x acquire, align 2
%sret = sext <1 x i16> %ret to <1 x i64>
ret <1 x i64> %sret
}
define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
; CHECK-LABEL: atomic_vec1_ptr270:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
%ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
ret <1 x ptr addrspace(270)> %ret
}
define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
ret <1 x bfloat> %ret
}
define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind {
; CHECK-LABEL: atomic_vec1_ptr_align:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: retq
%ret = load atomic <1 x ptr>, ptr %x acquire, align 8
ret <1 x ptr> %ret
}
define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
; CHECK-LABEL: atomic_vec1_i64_align:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: retq
%ret = load atomic <1 x i64>, ptr %x acquire, align 8
ret <1 x i64> %ret
}
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_ptr:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: pushq %rax
; CHECK-O3-NEXT: movq %rdi, %rsi
; CHECK-O3-NEXT: movq %rsp, %rdx
; CHECK-O3-NEXT: movl $8, %edi
; CHECK-O3-NEXT: movl $2, %ecx
; CHECK-O3-NEXT: callq __atomic_load@PLT
; CHECK-O3-NEXT: movq (%rsp), %rax
; CHECK-O3-NEXT: popq %rcx
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_ptr:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rax
; CHECK-O0-NEXT: movq %rdi, %rsi
; CHECK-O0-NEXT: movl $8, %edi
; CHECK-O0-NEXT: movq %rsp, %rdx
; CHECK-O0-NEXT: movl $2, %ecx
; CHECK-O0-NEXT: callq __atomic_load@PLT
; CHECK-O0-NEXT: movq (%rsp), %rax
; CHECK-O0-NEXT: popq %rcx
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
ret <1 x ptr> %ret
}
define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 2
ret <1 x half> %ret
}
define <1 x float> @atomic_vec1_float(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec1_float:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_float:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_float:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_float:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x float>, ptr %x acquire, align 4
ret <1 x float> %ret
}
define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec1_double_align:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_double_align:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_double_align:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_double_align:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x double>, ptr %x acquire, align 8
ret <1 x double> %ret
}
define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_i64:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: pushq %rax
; CHECK-O3-NEXT: movq %rdi, %rsi
; CHECK-O3-NEXT: movq %rsp, %rdx
; CHECK-O3-NEXT: movl $8, %edi
; CHECK-O3-NEXT: movl $2, %ecx
; CHECK-O3-NEXT: callq __atomic_load@PLT
; CHECK-O3-NEXT: movq (%rsp), %rax
; CHECK-O3-NEXT: popq %rcx
; CHECK-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_i64:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rax
; CHECK-O0-NEXT: movq %rdi, %rsi
; CHECK-O0-NEXT: movl $8, %edi
; CHECK-O0-NEXT: movq %rsp, %rdx
; CHECK-O0-NEXT: movl $2, %ecx
; CHECK-O0-NEXT: callq __atomic_load@PLT
; CHECK-O0-NEXT: movq (%rsp), %rax
; CHECK-O0-NEXT: popq %rcx
; CHECK-O0-NEXT: retq
%ret = load atomic <1 x i64>, ptr %x acquire, align 4
ret <1 x i64> %ret
}
define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec1_double:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: pushq %rax
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $8, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O3-NEXT: popq %rax
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_double:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: pushq %rax
; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX-O3-NEXT: movl $8, %edi
; CHECK-AVX-O3-NEXT: movl $2, %ecx
; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O3-NEXT: popq %rax
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_double:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: pushq %rax
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $8, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O0-NEXT: popq %rax
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_double:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: pushq %rax
; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX-O0-NEXT: movl $8, %edi
; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX-O0-NEXT: movl $2, %ecx
; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O0-NEXT: popq %rax
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x double>, ptr %x acquire, align 4
ret <1 x double> %ret
}
define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec2_i32:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: pushq %rax
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $8, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O3-NEXT: popq %rax
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_i32:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: pushq %rax
; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX-O3-NEXT: movl $8, %edi
; CHECK-AVX-O3-NEXT: movl $2, %ecx
; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O3-NEXT: popq %rax
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec2_i32:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: pushq %rax
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $8, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-SSE-O0-NEXT: popq %rax
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_i32:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: pushq %rax
; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX-O0-NEXT: movl $8, %edi
; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX-O0-NEXT: movl $2, %ecx
; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O0-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-AVX-O0-NEXT: popq %rax
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <2 x i32>, ptr %x acquire, align 4
ret <2 x i32> %ret
}
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec4_float:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: subq $24, %rsp
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $16, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O3-NEXT: addq $24, %rsp
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec4_float:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: subq $24, %rsp
; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX-O3-NEXT: movl $16, %edi
; CHECK-AVX-O3-NEXT: movl $2, %ecx
; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O3-NEXT: vmovaps (%rsp), %xmm0
; CHECK-AVX-O3-NEXT: addq $24, %rsp
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec4_float:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: subq $24, %rsp
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $16, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O0-NEXT: addq $24, %rsp
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec4_float:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: subq $24, %rsp
; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX-O0-NEXT: movl $16, %edi
; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX-O0-NEXT: movl $2, %ecx
; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O0-NEXT: vmovaps (%rsp), %xmm0
; CHECK-AVX-O0-NEXT: addq $24, %rsp
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <4 x float>, ptr %x acquire, align 4
ret <4 x float> %ret
}
define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec8_double:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: subq $72, %rsp
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $64, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
; CHECK-SSE-O3-NEXT: addq $72, %rsp
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX2-O3-LABEL: atomic_vec8_double:
; CHECK-AVX2-O3: # %bb.0:
; CHECK-AVX2-O3-NEXT: subq $72, %rsp
; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX2-O3-NEXT: movl $64, %edi
; CHECK-AVX2-O3-NEXT: movl $2, %ecx
; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0
; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-AVX2-O3-NEXT: addq $72, %rsp
; CHECK-AVX2-O3-NEXT: retq
;
; CHECK-AVX512-O3-LABEL: atomic_vec8_double:
; CHECK-AVX512-O3: # %bb.0:
; CHECK-AVX512-O3-NEXT: subq $72, %rsp
; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX512-O3-NEXT: movl $64, %edi
; CHECK-AVX512-O3-NEXT: movl $2, %ecx
; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0
; CHECK-AVX512-O3-NEXT: addq $72, %rsp
; CHECK-AVX512-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec8_double:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: subq $72, %rsp
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $64, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movapd (%rsp), %xmm0
; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
; CHECK-SSE-O0-NEXT: addq $72, %rsp
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX2-O0-LABEL: atomic_vec8_double:
; CHECK-AVX2-O0: # %bb.0:
; CHECK-AVX2-O0-NEXT: subq $72, %rsp
; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX2-O0-NEXT: movl $64, %edi
; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX2-O0-NEXT: movl $2, %ecx
; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX2-O0-NEXT: vmovupd (%rsp), %ymm0
; CHECK-AVX2-O0-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm1
; CHECK-AVX2-O0-NEXT: addq $72, %rsp
; CHECK-AVX2-O0-NEXT: retq
;
; CHECK-AVX512-O0-LABEL: atomic_vec8_double:
; CHECK-AVX512-O0: # %bb.0:
; CHECK-AVX512-O0-NEXT: subq $72, %rsp
; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX512-O0-NEXT: movl $64, %edi
; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX512-O0-NEXT: movl $2, %ecx
; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX512-O0-NEXT: vmovupd (%rsp), %zmm0
; CHECK-AVX512-O0-NEXT: addq $72, %rsp
; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <8 x double>, ptr %x acquire, align 4
ret <8 x double> %ret
}
define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: subq $40, %rsp
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $32, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O3-NEXT: addq $40, %rsp
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: subq $40, %rsp
; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX-O3-NEXT: movl $32, %edi
; CHECK-AVX-O3-NEXT: movl $2, %ecx
; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O3-NEXT: vmovups (%rsp), %ymm0
; CHECK-AVX-O3-NEXT: addq $40, %rsp
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: subq $40, %rsp
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $32, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O0-NEXT: addq $40, %rsp
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: subq $40, %rsp
; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX-O0-NEXT: movl $32, %edi
; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX-O0-NEXT: movl $2, %ecx
; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX-O0-NEXT: vmovups (%rsp), %ymm0
; CHECK-AVX-O0-NEXT: addq $40, %rsp
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
ret <16 x bfloat> %ret
}
define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec32_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: subq $72, %rsp
; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
; CHECK-SSE-O3-NEXT: movl $64, %edi
; CHECK-SSE-O3-NEXT: movl $2, %ecx
; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
; CHECK-SSE-O3-NEXT: addq $72, %rsp
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX2-O3-LABEL: atomic_vec32_half:
; CHECK-AVX2-O3: # %bb.0:
; CHECK-AVX2-O3-NEXT: subq $72, %rsp
; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX2-O3-NEXT: movl $64, %edi
; CHECK-AVX2-O3-NEXT: movl $2, %ecx
; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0
; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-AVX2-O3-NEXT: addq $72, %rsp
; CHECK-AVX2-O3-NEXT: retq
;
; CHECK-AVX512-O3-LABEL: atomic_vec32_half:
; CHECK-AVX512-O3: # %bb.0:
; CHECK-AVX512-O3-NEXT: subq $72, %rsp
; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi
; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx
; CHECK-AVX512-O3-NEXT: movl $64, %edi
; CHECK-AVX512-O3-NEXT: movl $2, %ecx
; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT
; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0
; CHECK-AVX512-O3-NEXT: addq $72, %rsp
; CHECK-AVX512-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec32_half:
; CHECK-SSE-O0: # %bb.0:
; CHECK-SSE-O0-NEXT: subq $72, %rsp
; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
; CHECK-SSE-O0-NEXT: movl $64, %edi
; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
; CHECK-SSE-O0-NEXT: movl $2, %ecx
; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0
; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
; CHECK-SSE-O0-NEXT: addq $72, %rsp
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX2-O0-LABEL: atomic_vec32_half:
; CHECK-AVX2-O0: # %bb.0:
; CHECK-AVX2-O0-NEXT: subq $72, %rsp
; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX2-O0-NEXT: movl $64, %edi
; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX2-O0-NEXT: movl $2, %ecx
; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX2-O0-NEXT: vmovups (%rsp), %ymm0
; CHECK-AVX2-O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-AVX2-O0-NEXT: addq $72, %rsp
; CHECK-AVX2-O0-NEXT: retq
;
; CHECK-AVX512-O0-LABEL: atomic_vec32_half:
; CHECK-AVX512-O0: # %bb.0:
; CHECK-AVX512-O0-NEXT: subq $72, %rsp
; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi
; CHECK-AVX512-O0-NEXT: movl $64, %edi
; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx
; CHECK-AVX512-O0-NEXT: movl $2, %ecx
; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT
; CHECK-AVX512-O0-NEXT: vmovups (%rsp), %zmm0
; CHECK-AVX512-O0-NEXT: addq $72, %rsp
; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <32 x half>, ptr %x acquire, align 4
ret <32 x half> %ret
}