blob: ea85c312f38bd1c277cdc9e5e3e4538ef774bc2a [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s
define void @PR50609(ptr noalias nocapture %RET, ptr noalias %aFOO, <16 x i32> %__mask) nounwind {
; CHECK-LABEL: PR50609:
; CHECK: # %bb.0: # %allocas
; CHECK-NEXT: leal 40(%rsi), %eax
; CHECK-NEXT: vmovq %rsi, %xmm2
; CHECK-NEXT: vmovd %eax, %xmm3
; CHECK-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vpsrad $31, %xmm2, %xmm3
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)
; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
allocas:
%aFOO_load_ptr2int = ptrtoint ptr %aFOO to i64
%aFOO_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
%aFOO_load4_offset = getelementptr float, ptr %aFOO, i64 10
%c_load_ptr2int = ptrtoint ptr %aFOO_load4_offset to i64
%c_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %c_load_ptr2int, i32 0
%0 = sub <16 x i64> %c_load_ptr2int_broadcast, %aFOO_load_ptr2int_broadcast
%1 = trunc <16 x i64> %0 to <16 x i32>
%2 = sdiv <16 x i32> %1, <i32 4, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%3 = sitofp <16 x i32> %2 to <16 x float>
%val0.i.i = shufflevector <16 x float> %3, <16 x float> undef, <8 x i32> zeroinitializer
%mask0.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%mask1.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
call void @llvm.x86.avx.maskstore.ps.256(ptr %RET, <8 x i32> %mask0.i.i, <8 x float> %val0.i.i) #1
%ptr1.i.i16 = getelementptr float, ptr %RET, i64 8
call void @llvm.x86.avx.maskstore.ps.256(ptr %ptr1.i.i16, <8 x i32> %mask1.i.i, <8 x float> %val0.i.i) #1
ret void
}
declare void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)