|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL | 
|  |  | 
|  | define <2 x double> @floor_v2f64(<2 x double> %p) { | 
|  | ; SSE41-LABEL: floor_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  | declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) | 
|  |  | 
|  | define <4 x float> @floor_v4f32(<4 x float> %p) { | 
|  | ; SSE41-LABEL: floor_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  | declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) | 
|  |  | 
|  | define <4 x double> @floor_v4f64(<4 x double> %p){ | 
|  | ; SSE41-LABEL: floor_v4f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v4f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v4f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) | 
|  | ret <4 x double> %t | 
|  | } | 
|  | declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) | 
|  |  | 
|  | define <8 x float> @floor_v8f32(<8 x float> %p) { | 
|  | ; SSE41-LABEL: floor_v8f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v8f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v8f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) | 
|  | ret <8 x float> %t | 
|  | } | 
|  | declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) | 
|  |  | 
|  | define <8 x double> @floor_v8f64(<8 x double> %p){ | 
|  | ; SSE41-LABEL: floor_v8f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v8f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v8f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscalepd $9, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) | 
|  | ret <8 x double> %t | 
|  | } | 
|  | declare <8 x double> @llvm.floor.v8f64(<8 x double> %p) | 
|  |  | 
|  | define <16 x float> @floor_v16f32(<16 x float> %p) { | 
|  | ; SSE41-LABEL: floor_v16f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_v16f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_v16f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscaleps $9, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) | 
|  | ret <16 x float> %t | 
|  | } | 
|  | declare <16 x float> @llvm.floor.v16f32(<16 x float> %p) | 
|  |  | 
|  | define <2 x double> @ceil_v2f64(<2 x double> %p) { | 
|  | ; SSE41-LABEL: ceil_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  | declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) | 
|  |  | 
|  | define <2 x double> @ceil_v2f64_load(ptr %ptr) { | 
|  | ; SSE41-LABEL: ceil_v2f64_load: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movupd (%rdi), %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v2f64_load: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $10, (%rdi), %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v2f64_load: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $10, (%rdi), %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %p = load <2 x double>, ptr %ptr, align 1 | 
|  | %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_v4f32(<4 x float> %p) { | 
|  | ; SSE41-LABEL: ceil_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  | declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) | 
|  |  | 
|  | define <4 x float> @ceil_v4f32_load(ptr %ptr) { | 
|  | ; SSE41-LABEL: ceil_v4f32_load: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movups (%rdi), %xmm0 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v4f32_load: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $10, (%rdi), %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v4f32_load: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $10, (%rdi), %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %p = load <4 x float>, ptr %ptr, align 1 | 
|  | %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  |  | 
|  | define <4 x double> @ceil_v4f64(<4 x double> %p) { | 
|  | ; SSE41-LABEL: ceil_v4f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v4f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v4f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) | 
|  | ret <4 x double> %t | 
|  | } | 
|  | declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) | 
|  |  | 
|  | define <8 x float> @ceil_v8f32(<8 x float> %p) { | 
|  | ; SSE41-LABEL: ceil_v8f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v8f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v8f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) | 
|  | ret <8 x float> %t | 
|  | } | 
|  | declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) | 
|  |  | 
|  | define <8 x double> @ceil_v8f64(<8 x double> %p){ | 
|  | ; SSE41-LABEL: ceil_v8f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v8f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v8f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscalepd $10, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) | 
|  | ret <8 x double> %t | 
|  | } | 
|  | declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p) | 
|  |  | 
|  | define <16 x float> @ceil_v16f32(<16 x float> %p) { | 
|  | ; SSE41-LABEL: ceil_v16f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_v16f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_v16f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscaleps $10, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) | 
|  | ret <16 x float> %t | 
|  | } | 
|  | declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p) | 
|  |  | 
|  | define <2 x double> @trunc_v2f64(<2 x double> %p) { | 
|  | ; SSE41-LABEL: trunc_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $11, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  | declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) | 
|  |  | 
|  | define <4 x float> @trunc_v4f32(<4 x float> %p) { | 
|  | ; SSE41-LABEL: trunc_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $11, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $11, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  | declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) | 
|  |  | 
|  | define <4 x double> @trunc_v4f64(<4 x double> %p) { | 
|  | ; SSE41-LABEL: trunc_v4f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v4f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v4f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $11, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) | 
|  | ret <4 x double> %t | 
|  | } | 
|  | declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) | 
|  |  | 
|  | define <8 x float> @trunc_v8f32(<8 x float> %p) { | 
|  | ; SSE41-LABEL: trunc_v8f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $11, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v8f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $11, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v8f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $11, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) | 
|  | ret <8 x float> %t | 
|  | } | 
|  | declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) | 
|  |  | 
|  | define <8 x double> @trunc_v8f64(<8 x double> %p){ | 
|  | ; SSE41-LABEL: trunc_v8f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $11, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v8f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundpd $11, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v8f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscalepd $11, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) | 
|  | ret <8 x double> %t | 
|  | } | 
|  | declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p) | 
|  |  | 
|  | define <16 x float> @trunc_v16f32(<16 x float> %p) { | 
|  | ; SSE41-LABEL: trunc_v16f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $11, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $11, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $11, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: trunc_v16f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $11, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundps $11, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: trunc_v16f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscaleps $11, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) | 
|  | ret <16 x float> %t | 
|  | } | 
|  | declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p) | 
|  |  | 
|  | define <2 x double> @rint_v2f64(<2 x double> %p) { | 
|  | ; SSE41-LABEL: rint_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $4, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $4, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  | declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) | 
|  |  | 
|  | define <4 x float> @rint_v4f32(<4 x float> %p) { | 
|  | ; SSE41-LABEL: rint_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $4, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $4, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  | declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) | 
|  |  | 
|  | define <4 x double> @rint_v4f64(<4 x double> %p) { | 
|  | ; SSE41-LABEL: rint_v4f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v4f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $4, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v4f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $4, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) | 
|  | ret <4 x double> %t | 
|  | } | 
|  | declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) | 
|  |  | 
|  | define <8 x float> @rint_v8f32(<8 x float> %p) { | 
|  | ; SSE41-LABEL: rint_v8f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $4, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v8f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $4, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v8f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $4, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) | 
|  | ret <8 x float> %t | 
|  | } | 
|  | declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) | 
|  |  | 
|  | define <8 x double> @rint_v8f64(<8 x double> %p){ | 
|  | ; SSE41-LABEL: rint_v8f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $4, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v8f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $4, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundpd $4, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v8f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscalepd $4, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) | 
|  | ret <8 x double> %t | 
|  | } | 
|  | declare <8 x double> @llvm.rint.v8f64(<8 x double> %p) | 
|  |  | 
|  | define <16 x float> @rint_v16f32(<16 x float> %p) { | 
|  | ; SSE41-LABEL: rint_v16f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $4, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $4, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $4, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $4, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: rint_v16f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $4, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundps $4, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rint_v16f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscaleps $4, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) | 
|  | ret <16 x float> %t | 
|  | } | 
|  | declare <16 x float> @llvm.rint.v16f32(<16 x float> %p) | 
|  |  | 
|  | define <2 x double> @nearbyint_v2f64(<2 x double> %p) { | 
|  | ; SSE41-LABEL: nearbyint_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $12, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $12, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) | 
|  | ret <2 x double> %t | 
|  | } | 
|  | declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) | 
|  |  | 
|  | define <4 x float> @nearbyint_v4f32(<4 x float> %p) { | 
|  | ; SSE41-LABEL: nearbyint_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $12, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $12, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) | 
|  | ret <4 x float> %t | 
|  | } | 
|  | declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) | 
|  |  | 
|  | define <4 x double> @nearbyint_v4f64(<4 x double> %p) { | 
|  | ; SSE41-LABEL: nearbyint_v4f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v4f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $12, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v4f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundpd $12, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) | 
|  | ret <4 x double> %t | 
|  | } | 
|  | declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) | 
|  |  | 
|  | define <8 x float> @nearbyint_v8f32(<8 x float> %p) { | 
|  | ; SSE41-LABEL: nearbyint_v8f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $12, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v8f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $12, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v8f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundps $12, %ymm0, %ymm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) | 
|  | ret <8 x float> %t | 
|  | } | 
|  | declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) | 
|  |  | 
|  | define <8 x double> @nearbyint_v8f64(<8 x double> %p){ | 
|  | ; SSE41-LABEL: nearbyint_v8f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $12, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v8f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundpd $12, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundpd $12, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v8f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscalepd $12, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) | 
|  | ret <8 x double> %t | 
|  | } | 
|  | declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) | 
|  |  | 
|  | define <16 x float> @nearbyint_v16f32(<16 x float> %p) { | 
|  | ; SSE41-LABEL: nearbyint_v16f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $12, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    roundps $12, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $12, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $12, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: nearbyint_v16f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundps $12, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vroundps $12, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: nearbyint_v16f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vrndscaleps $12, %zmm0, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) | 
|  | ret <16 x float> %t | 
|  | } | 
|  | declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) | 
|  |  | 
|  | ; | 
|  | ; Constant Folding | 
|  | ; | 
|  |  | 
|  | define <2 x double> @const_floor_v2f64() { | 
|  | ; SSE41-LABEL: const_floor_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_floor_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_floor_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>) | 
|  | ret <2 x double> %t | 
|  | } | 
|  |  | 
|  | define <4 x float> @const_floor_v4f32() { | 
|  | ; SSE41-LABEL: const_floor_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_floor_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_floor_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) | 
|  | ret <4 x float> %t | 
|  | } | 
|  |  | 
|  | define <2 x double> @const_ceil_v2f64() { | 
|  | ; SSE41-LABEL: const_ceil_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_ceil_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_ceil_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>) | 
|  | ret <2 x double> %t | 
|  | } | 
|  |  | 
|  | define <4 x float> @const_ceil_v4f32() { | 
|  | ; SSE41-LABEL: const_ceil_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_ceil_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_ceil_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) | 
|  | ret <4 x float> %t | 
|  | } | 
|  |  | 
|  | define <2 x double> @const_trunc_v2f64() { | 
|  | ; SSE41-LABEL: const_trunc_v2f64: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_trunc_v2f64: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_trunc_v2f64: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>) | 
|  | ret <2 x double> %t | 
|  | } | 
|  |  | 
|  | define <4 x float> @const_trunc_v4f32() { | 
|  | ; SSE41-LABEL: const_trunc_v4f32: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: const_trunc_v4f32: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: const_trunc_v4f32: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0] | 
|  | ; AVX512-NEXT:    retq | 
|  | %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) | 
|  | ret <4 x float> %t | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; Scalar and masked instructions | 
|  | ; | 
|  |  | 
|  | define <4 x float> @floor_ss(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX512-NEXT:    retq | 
|  | %s = extractelement <4 x float> %x, i32 0 | 
|  | %call = call float @llvm.floor.f32(float %s) | 
|  | %res = insertelement <4 x float> %y, float %call, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare float @llvm.floor.f32(float %s) | 
|  |  | 
|  | define <2 x double> @floor_sd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX512-NEXT:    retq | 
|  | %s = extractelement <2 x double> %x, i32 0 | 
|  | %call = call double @llvm.floor.f64(double %s) | 
|  | %res = insertelement <2 x double> %y, double %call, i32 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  | declare double @llvm.floor.f64(double %s) | 
|  |  | 
|  | define <4 x float> @floor_mask_128_ps(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_128_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_128_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_mask_128_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_mask_128_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $9, %xmm0, %xmm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x float> %x, %y | 
|  | %call = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) | 
|  | %res = select <4 x i1> %k, <4 x float> %call, <4 x float> %y | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_maskz_128_ps(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_128_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_128_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 | 
|  | ; AVX-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandps %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_maskz_128_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $9, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_maskz_128_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $9, %xmm0, %xmm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x float> %x, %y | 
|  | %call = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) | 
|  | %res = select <4 x i1> %k, <4 x float> %call, <4 x float> zeroinitializer | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_mask_128_pd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_128_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_128_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_mask_128_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_mask_128_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $9, %xmm0, %xmm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <2 x double> %x, %y | 
|  | %call = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) | 
|  | %res = select <2 x i1> %k, <2 x double> %call, <2 x double> %y | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_maskz_128_pd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_128_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_128_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 | 
|  | ; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandpd %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_maskz_128_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $9, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_maskz_128_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $9, %xmm0, %xmm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <2 x double> %x, %y | 
|  | %call = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) | 
|  | %res = select <2 x i1> %k, <2 x double> %call, <2 x double> zeroinitializer | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <8 x float> @floor_mask_256_ps(<8 x float> %x, <8 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_256_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm4 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm5, %xmm2 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm4, %xmm3 | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    movaps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_256_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_mask_256_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_mask_256_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $9, %ymm0, %ymm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovaps %ymm1, %ymm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <8 x float> %x, %y | 
|  | %call = call <8 x float> @llvm.floor.v8f32(<8 x float> %x) | 
|  | %res = select <8 x i1> %k, <8 x float> %call, <8 x float> %y | 
|  | ret <8 x float> %res | 
|  | } | 
|  |  | 
|  | define <8 x float> @floor_maskz_256_ps(<8 x float> %x, <8 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_256_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_256_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandps %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_maskz_256_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_maskz_256_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $9, %ymm0, %ymm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <8 x float> %x, %y | 
|  | %call = call <8 x float> @llvm.floor.v8f32(<8 x float> %x) | 
|  | %res = select <8 x i1> %k, <8 x float> %call, <8 x float> zeroinitializer | 
|  | ret <8 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x double> @floor_mask_256_pd(<4 x double> %x, <4 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_256_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm4 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm2 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3 | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    movapd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_256_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_mask_256_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_mask_256_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $9, %ymm0, %ymm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovapd %ymm1, %ymm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x double> %x, %y | 
|  | %call = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) | 
|  | %res = select <4 x i1> %k, <4 x double> %call, <4 x double> %y | 
|  | ret <4 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x double> @floor_maskz_256_pd(<4 x double> %x, <4 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_256_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andpd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_256_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandpd %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: floor_maskz_256_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: floor_maskz_256_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $9, %ymm0, %ymm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x double> %x, %y | 
|  | %call = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) | 
|  | %res = select <4 x i1> %k, <4 x double> %call, <4 x double> zeroinitializer | 
|  | ret <4 x double> %res | 
|  | } | 
|  |  | 
|  | define <16 x float> @floor_mask_512_ps(<16 x float> %x, <16 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_512_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $9, %xmm3, %xmm8 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm2, %xmm9 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm10 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm11 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm11, %xmm4 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm10, %xmm5 | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm9, %xmm6 | 
|  | ; SSE41-NEXT:    movaps %xmm3, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm8, %xmm7 | 
|  | ; SSE41-NEXT:    movaps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    movaps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    movaps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    movaps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_512_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm3, %ymm1, %ymm4 | 
|  | ; AVX-NEXT:    vcmpeqps %ymm2, %ymm0, %ymm5 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm5, %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm4, %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_512_ps: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscaleps $9, %zmm0, %zmm1 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %zmm1, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <16 x float> %x, %y | 
|  | %call = call <16 x float> @llvm.floor.v16f32(<16 x float> %x) | 
|  | %res = select <16 x i1> %k, <16 x float> %call, <16 x float> %y | 
|  | ret <16 x float> %res | 
|  | } | 
|  |  | 
|  | define <16 x float> @floor_maskz_512_ps(<16 x float> %x, <16 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_512_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm3, %xmm7 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm2, %xmm6 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm4 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    andps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    andps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_512_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm3, %ymm1, %ymm3 | 
|  | ; AVX-NEXT:    vcmpeqps %ymm2, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vandps %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandps %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_512_ps: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscaleps $9, %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <16 x float> %x, %y | 
|  | %call = call <16 x float> @llvm.floor.v16f32(<16 x float> %x) | 
|  | %res = select <16 x i1> %k, <16 x float> %call, <16 x float> zeroinitializer | 
|  | ret <16 x float> %res | 
|  | } | 
|  |  | 
|  | define <8 x double> @floor_mask_512_pd(<8 x double> %x, <8 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_512_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm3, %xmm8 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm2, %xmm9 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm10 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm11 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm4 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm5 | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm6 | 
|  | ; SSE41-NEXT:    movapd %xmm3, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm7 | 
|  | ; SSE41-NEXT:    movapd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    movapd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    movapd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    movapd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_512_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm3, %ymm1, %ymm4 | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm5 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_512_pd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscalepd $9, %zmm0, %zmm1 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %zmm1, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <8 x double> %x, %y | 
|  | %call = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) | 
|  | %res = select <8 x i1> %k, <8 x double> %call, <8 x double> %y | 
|  | ret <8 x double> %res | 
|  | } | 
|  |  | 
|  | define <8 x double> @floor_maskz_512_pd(<8 x double> %x, <8 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_512_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm3, %xmm7 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm2, %xmm6 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm4 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    andpd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    andpd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andpd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_512_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm3, %ymm1, %ymm3 | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vandpd %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandpd %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_512_pd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscalepd $9, %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <8 x double> %x, %y | 
|  | %call = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) | 
|  | %res = select <8 x i1> %k, <8 x double> %call, <8 x double> zeroinitializer | 
|  | ret <8 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB52_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB52_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB52_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB52_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %nmask, float %dst, float %call | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    je LBB53_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB53_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 | 
|  | ; AVX-NEXT:    je LBB53_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB53_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %low = select i1 %nmask, float zeroinitializer, float %call | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double> %w, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB54_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB54_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB54_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB54_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %nmask, double %dst, double %call | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    xorpd %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    je LBB55_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB55_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 | 
|  | ; AVX-NEXT:    je LBB55_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB55_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %low = select i1 %nmask, double zeroinitializer, double %call | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x float> %w, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_ss_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB56_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB56_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_ss_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB56_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB56_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_ss_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %mask, float %call, float %dst | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_ss_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    jne LBB57_1 | 
|  | ; SSE41-NEXT:  ## %bb.2: | 
|  | ; SSE41-NEXT:    xorps %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    jmp LBB57_3 | 
|  | ; SSE41-NEXT:  LBB57_1: | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:  LBB57_3: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_ss_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    jne LBB57_1 | 
|  | ; AVX-NEXT:  ## %bb.2: | 
|  | ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; AVX-NEXT:  LBB57_1: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_ss_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %low = select i1 %mask, float %call, float zeroinitializer | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x double> %w, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_sd_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB58_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB58_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_sd_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB58_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB58_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_sd_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %mask, double %call, double %dst | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_sd_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    jne LBB59_1 | 
|  | ; SSE41-NEXT:  ## %bb.2: | 
|  | ; SSE41-NEXT:    xorpd %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    jmp LBB59_3 | 
|  | ; SSE41-NEXT:  LBB59_1: | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:  LBB59_3: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_sd_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    jne LBB59_1 | 
|  | ; AVX-NEXT:  ## %bb.2: | 
|  | ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; AVX-NEXT:  LBB59_1: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_sd_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %low = select i1 %mask, double %call, double zeroinitializer | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x float> %w) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_ss_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqss %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm3, %xmm2 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_ss_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_ss_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm3, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <4 x float> %x, %y | 
|  | %mask = extractelement <4 x i1> %mask1, i64 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %mask, float %call, float %dst | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @floor_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_ss_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqss %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_ss_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_ss_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <4 x float> %x, %y | 
|  | %mask = extractelement <4 x i1> %mask1, i64 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.floor.f32(float %s) | 
|  | %low = select i1 %mask, float %call, float zeroinitializer | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x double> %w) nounwind { | 
|  | ; SSE41-LABEL: floor_mask_sd_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqsd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm2 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_mask_sd_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_mask_sd_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm3, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <2 x double> %x, %y | 
|  | %mask = extractelement <2 x i1> %mask1, i64 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %mask, double %call, double %dst | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: floor_maskz_sd_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $9, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqsd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: floor_maskz_sd_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: floor_maskz_sd_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <2 x double> %x, %y | 
|  | %mask = extractelement <2 x i1> %mask1, i64 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.floor.f64(double %s) | 
|  | %low = select i1 %mask, double %call, double zeroinitializer | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_ss(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX512-NEXT:    retq | 
|  | %s = extractelement <4 x float> %x, i32 0 | 
|  | %call = call float @llvm.ceil.f32(float %s) | 
|  | %res = insertelement <4 x float> %y, float %call, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare float @llvm.ceil.f32(float %s) | 
|  |  | 
|  | define <2 x double> @ceil_sd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX512-NEXT:    retq | 
|  | %s = extractelement <2 x double> %x, i32 0 | 
|  | %call = call double @llvm.ceil.f64(double %s) | 
|  | %res = insertelement <2 x double> %y, double %call, i32 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  | declare double @llvm.ceil.f64(double %s) | 
|  |  | 
|  | define <4 x float> @ceil_mask_128_ps(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_128_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_128_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_mask_128_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_mask_128_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $10, %xmm0, %xmm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x float> %x, %y | 
|  | %call = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) | 
|  | %res = select <4 x i1> %k, <4 x float> %call, <4 x float> %y | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_maskz_128_ps(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_128_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_128_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 | 
|  | ; AVX-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandps %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_maskz_128_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $10, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_maskz_128_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $10, %xmm0, %xmm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x float> %x, %y | 
|  | %call = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) | 
|  | %res = select <4 x i1> %k, <4 x float> %call, <4 x float> zeroinitializer | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_mask_128_pd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_128_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_128_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_mask_128_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_mask_128_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $10, %xmm0, %xmm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <2 x double> %x, %y | 
|  | %call = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) | 
|  | %res = select <2 x i1> %k, <2 x double> %call, <2 x double> %y | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_maskz_128_pd(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_128_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_128_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 | 
|  | ; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandpd %xmm0, %xmm1, %xmm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_maskz_128_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $10, %xmm0, %xmm0 | 
|  | ; AVX512F-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    vzeroupper | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_maskz_128_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $10, %xmm0, %xmm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <2 x double> %x, %y | 
|  | %call = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) | 
|  | %res = select <2 x i1> %k, <2 x double> %call, <2 x double> zeroinitializer | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <8 x float> @ceil_mask_256_ps(<8 x float> %x, <8 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_256_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm4 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm5, %xmm2 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm4, %xmm3 | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    movaps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_256_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_mask_256_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_mask_256_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $10, %ymm0, %ymm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovaps %ymm1, %ymm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <8 x float> %x, %y | 
|  | %call = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) | 
|  | %res = select <8 x i1> %k, <8 x float> %call, <8 x float> %y | 
|  | ret <8 x float> %res | 
|  | } | 
|  |  | 
|  | define <8 x float> @ceil_maskz_256_ps(<8 x float> %x, <8 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_256_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andps %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_256_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandps %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_maskz_256_ps: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_maskz_256_ps: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscaleps $10, %ymm0, %ymm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <8 x float> %x, %y | 
|  | %call = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) | 
|  | %res = select <8 x i1> %k, <8 x float> %call, <8 x float> zeroinitializer | 
|  | ret <8 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x double> @ceil_mask_256_pd(<4 x double> %x, <4 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_256_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm4 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm2 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3 | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    movapd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_256_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_mask_256_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_mask_256_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $10, %ymm0, %ymm1 {%k1} | 
|  | ; AVX512VL-NEXT:    vmovapd %ymm1, %ymm0 | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x double> %x, %y | 
|  | %call = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) | 
|  | %res = select <4 x i1> %k, <4 x double> %call, <4 x double> %y | 
|  | ret <4 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x double> @ceil_maskz_256_pd(<4 x double> %x, <4 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_256_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andpd %xmm3, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_256_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandpd %ymm0, %ymm1, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512F-LABEL: ceil_maskz_256_pd: | 
|  | ; AVX512F:       ## %bb.0: | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1 | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0 | 
|  | ; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512F-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX512F-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512F-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0 | 
|  | ; AVX512F-NEXT:    retq | 
|  | ; | 
|  | ; AVX512VL-LABEL: ceil_maskz_256_pd: | 
|  | ; AVX512VL:       ## %bb.0: | 
|  | ; AVX512VL-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 | 
|  | ; AVX512VL-NEXT:    vrndscalepd $10, %ymm0, %ymm0 {%k1} {z} | 
|  | ; AVX512VL-NEXT:    retq | 
|  | %k = fcmp oeq <4 x double> %x, %y | 
|  | %call = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) | 
|  | %res = select <4 x i1> %k, <4 x double> %call, <4 x double> zeroinitializer | 
|  | ret <4 x double> %res | 
|  | } | 
|  |  | 
|  | define <16 x float> @ceil_mask_512_ps(<16 x float> %x, <16 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_512_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundps $10, %xmm3, %xmm8 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm2, %xmm9 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm10 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm11 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm11, %xmm4 | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm10, %xmm5 | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm9, %xmm6 | 
|  | ; SSE41-NEXT:    movaps %xmm3, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm8, %xmm7 | 
|  | ; SSE41-NEXT:    movaps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    movaps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    movaps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    movaps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_512_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm3, %ymm1, %ymm4 | 
|  | ; AVX-NEXT:    vcmpeqps %ymm2, %ymm0, %ymm5 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm5, %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    vblendvps %ymm4, %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_512_ps: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscaleps $10, %zmm0, %zmm1 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %zmm1, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <16 x float> %x, %y | 
|  | %call = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) | 
|  | %res = select <16 x i1> %k, <16 x float> %call, <16 x float> %y | 
|  | ret <16 x float> %res | 
|  | } | 
|  |  | 
|  | define <16 x float> @ceil_maskz_512_ps(<16 x float> %x, <16 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_512_ps: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqps %xmm3, %xmm7 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm2, %xmm6 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm1, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqps %xmm0, %xmm4 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    andps %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    andps %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andps %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_512_ps: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqps %ymm3, %ymm1, %ymm3 | 
|  | ; AVX-NEXT:    vcmpeqps %ymm2, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vandps %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandps %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_512_ps: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscaleps $10, %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <16 x float> %x, %y | 
|  | %call = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) | 
|  | %res = select <16 x i1> %k, <16 x float> %call, <16 x float> zeroinitializer | 
|  | ret <16 x float> %res | 
|  | } | 
|  |  | 
|  | define <8 x double> @ceil_mask_512_pd(<8 x double> %x, <8 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_512_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm3, %xmm8 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm2, %xmm9 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm10 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm11 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm4 | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm5 | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm6 | 
|  | ; SSE41-NEXT:    movapd %xmm3, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm7 | 
|  | ; SSE41-NEXT:    movapd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    movapd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    movapd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    movapd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_512_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm3, %ymm1, %ymm4 | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm5 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_512_pd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscalepd $10, %zmm0, %zmm1 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %zmm1, %zmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <8 x double> %x, %y | 
|  | %call = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) | 
|  | %res = select <8 x i1> %k, <8 x double> %call, <8 x double> %y | 
|  | ret <8 x double> %res | 
|  | } | 
|  |  | 
|  | define <8 x double> @ceil_maskz_512_pd(<8 x double> %x, <8 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_512_pd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm3, %xmm7 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm2, %xmm6 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm1, %xmm5 | 
|  | ; SSE41-NEXT:    cmpeqpd %xmm0, %xmm4 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm3, %xmm3 | 
|  | ; SSE41-NEXT:    andpd %xmm7, %xmm3 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    andpd %xmm6, %xmm2 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1 | 
|  | ; SSE41-NEXT:    andpd %xmm5, %xmm1 | 
|  | ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm4, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_512_pd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm3, %ymm1, %ymm3 | 
|  | ; AVX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm2 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm1, %ymm1 | 
|  | ; AVX-NEXT:    vandpd %ymm1, %ymm3, %ymm1 | 
|  | ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0 | 
|  | ; AVX-NEXT:    vandpd %ymm0, %ymm2, %ymm0 | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_512_pd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 | 
|  | ; AVX512-NEXT:    vrndscalepd $10, %zmm0, %zmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %k = fcmp oeq <8 x double> %x, %y | 
|  | %call = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) | 
|  | %res = select <8 x i1> %k, <8 x double> %call, <8 x double> zeroinitializer | 
|  | ret <8 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB78_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB78_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB78_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB78_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %nmask, float %dst, float %call | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_ss: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    je LBB79_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB79_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_ss: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 | 
|  | ; AVX-NEXT:    je LBB79_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB79_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_ss: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %low = select i1 %nmask, float zeroinitializer, float %call | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double> %w, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB80_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB80_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB80_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB80_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %nmask, double %dst, double %call | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_sd: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    xorpd %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    je LBB81_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB81_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_sd: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 | 
|  | ; AVX-NEXT:    je LBB81_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB81_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_sd: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = and i8 %k, 1 | 
|  | %nmask = icmp eq i8 %mask, 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %low = select i1 %nmask, double zeroinitializer, double %call | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x float> %w, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_ss_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB82_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB82_2: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_ss_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB82_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB82_2: | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_ss_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %mask, float %call, float %dst | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_ss_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    jne LBB83_1 | 
|  | ; SSE41-NEXT:  ## %bb.2: | 
|  | ; SSE41-NEXT:    xorps %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    jmp LBB83_3 | 
|  | ; SSE41-NEXT:  LBB83_1: | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:  LBB83_3: | 
|  | ; SSE41-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_ss_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    jne LBB83_1 | 
|  | ; AVX-NEXT:  ## %bb.2: | 
|  | ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; AVX-NEXT:  LBB83_1: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_ss_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %low = select i1 %mask, float %call, float zeroinitializer | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x double> %w, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_sd_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    je LBB84_2 | 
|  | ; SSE41-NEXT:  ## %bb.1: | 
|  | ; SSE41-NEXT:    xorps %xmm2, %xmm2 | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:  LBB84_2: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_sd_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    je LBB84_2 | 
|  | ; AVX-NEXT:  ## %bb.1: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:  LBB84_2: | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_sd_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %mask, double %call, double %dst | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %k) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_sd_trunc: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    testb $1, %dil | 
|  | ; SSE41-NEXT:    jne LBB85_1 | 
|  | ; SSE41-NEXT:  ## %bb.2: | 
|  | ; SSE41-NEXT:    xorpd %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:    jmp LBB85_3 | 
|  | ; SSE41-NEXT:  LBB85_1: | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm0 | 
|  | ; SSE41-NEXT:  LBB85_3: | 
|  | ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_sd_trunc: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    testb $1, %dil | 
|  | ; AVX-NEXT:    jne LBB85_1 | 
|  | ; AVX-NEXT:  ## %bb.2: | 
|  | ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; AVX-NEXT:  LBB85_1: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_sd_trunc: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    kmovw %edi, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask = trunc i16 %k to i1 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %low = select i1 %mask, double %call, double zeroinitializer | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x float> %w) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_ss_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqss %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvps %xmm0, %xmm3, %xmm2 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    movaps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_ss_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_ss_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm3, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovaps %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <4 x float> %x, %y | 
|  | %mask = extractelement <4 x i1> %mask1, i64 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %dst = extractelement <4 x float> %w, i64 0 | 
|  | %low = select i1 %mask, float %call, float %dst | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @ceil_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_ss_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundss $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqss %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    andps %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_ss_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_ss_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <4 x float> %x, %y | 
|  | %mask = extractelement <4 x i1> %mask1, i64 0 | 
|  | %s = extractelement <4 x float> %x, i64 0 | 
|  | %call = tail call float @llvm.ceil.f32(float %s) | 
|  | %low = select i1 %mask, float %call, float zeroinitializer | 
|  | %res = insertelement <4 x float> %y, float %low, i64 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x double> %w) nounwind { | 
|  | ; SSE41-LABEL: ceil_mask_sd_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm3 | 
|  | ; SSE41-NEXT:    cmpeqsd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm2 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] | 
|  | ; SSE41-NEXT:    movapd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_mask_sd_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_mask_sd_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm3 | 
|  | ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm3, %xmm1, %xmm2 {%k1} | 
|  | ; AVX512-NEXT:    vmovapd %xmm2, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <2 x double> %x, %y | 
|  | %mask = extractelement <2 x i1> %mask1, i64 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %dst = extractelement <2 x double> %w, i64 0 | 
|  | %low = select i1 %mask, double %call, double %dst | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } | 
|  |  | 
|  | define <2 x double> @ceil_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounwind { | 
|  | ; SSE41-LABEL: ceil_maskz_sd_mask8: | 
|  | ; SSE41:       ## %bb.0: | 
|  | ; SSE41-NEXT:    roundsd $10, %xmm0, %xmm2 | 
|  | ; SSE41-NEXT:    cmpeqsd %xmm1, %xmm0 | 
|  | ; SSE41-NEXT:    andpd %xmm2, %xmm0 | 
|  | ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; SSE41-NEXT:    retq | 
|  | ; | 
|  | ; AVX-LABEL: ceil_maskz_sd_mask8: | 
|  | ; AVX:       ## %bb.0: | 
|  | ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0 | 
|  | ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] | 
|  | ; AVX-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: ceil_maskz_sd_mask8: | 
|  | ; AVX512:       ## %bb.0: | 
|  | ; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2 | 
|  | ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 | 
|  | ; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z} | 
|  | ; AVX512-NEXT:    retq | 
|  | %mask1 = fcmp oeq <2 x double> %x, %y | 
|  | %mask = extractelement <2 x i1> %mask1, i64 0 | 
|  | %s = extractelement <2 x double> %x, i64 0 | 
|  | %call = tail call double @llvm.ceil.f64(double %s) | 
|  | %low = select i1 %mask, double %call, double zeroinitializer | 
|  | %res = insertelement <2 x double> %y, double %low, i64 0 | 
|  | ret <2 x double> %res | 
|  | } |