|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=NOFMA | 
|  | ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=FMA,FMA-AVX1 | 
|  | ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma4 < %s | FileCheck %s --check-prefix=FMA4 | 
|  | ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=FMA,FMA-AVX512 | 
|  |  | 
|  | define float @f1(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f1: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f1: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f1: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg float %0 | 
|  | %result = call float @llvm.experimental.constrained.fma.f32(float %3, float %1, float %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | define double @f2(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f2: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f2: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f2: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg double %0 | 
|  | %result = call double @llvm.experimental.constrained.fma.f64(double %3, double %1, double %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | define float @f3(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f3: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f3: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f3: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg float %2 | 
|  | %result = call float @llvm.experimental.constrained.fma.f32(float %0, float %1, float %3, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | define double @f4(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f4: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f4: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f4: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg double %2 | 
|  | %result = call double @llvm.experimental.constrained.fma.f64(double %0, double %1, double %3, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | define float @f5(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f5: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f5: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f5: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg float %0 | 
|  | %4 = fneg float %2 | 
|  | %result = call float @llvm.experimental.constrained.fma.f32(float %3, float %1, float %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | define double @f6(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f6: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f6: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f6: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg double %0 | 
|  | %4 = fneg double %2 | 
|  | %result = call double @llvm.experimental.constrained.fma.f64(double %3, double %1, double %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | define float @f7(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f7: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX1-LABEL: f7: | 
|  | ; FMA-AVX1:       # %bb.0: # %entry | 
|  | ; FMA-AVX1-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-AVX1-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f7: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX512-LABEL: f7: | 
|  | ; FMA-AVX512:       # %bb.0: # %entry | 
|  | ; FMA-AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; FMA-AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-AVX512-NEXT:    retq | 
|  | entry: | 
|  | %3 = call float @llvm.experimental.constrained.fma.f32(float %0, float %1, float %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg float %3 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | define double @f8(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f8: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f8: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f8: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = call double @llvm.experimental.constrained.fma.f64(double %0, double %1, double %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg double %3 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | define float @f9(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f9: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX1-LABEL: f9: | 
|  | ; FMA-AVX1:       # %bb.0: # %entry | 
|  | ; FMA-AVX1-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-AVX1-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f9: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX512-LABEL: f9: | 
|  | ; FMA-AVX512:       # %bb.0: # %entry | 
|  | ; FMA-AVX512-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; FMA-AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-AVX512-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg float %0 | 
|  | %4 = fneg float %2 | 
|  | %5 = call float @llvm.experimental.constrained.fma.f32(float %3, float %1, float %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg float %5 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | define double @f10(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f10: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f10: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f10: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg double %0 | 
|  | %4 = fneg double %2 | 
|  | %5 = call double @llvm.experimental.constrained.fma.f64(double %3, double %1, double %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg double %5 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | ; Verify constrained fmul and fadd aren't fused. | 
|  | define float @f11(float %0, float %1, float %2) #0 { | 
|  | ; NOFMA-LABEL: f11: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    mulss %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addss %xmm2, %xmm0 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f11: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmulss %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    vaddss %xmm2, %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f11: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmulss %xmm1, %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    vaddss %xmm2, %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = call float @llvm.experimental.constrained.fmul.f32(float %0, float %1, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %4 = call float @llvm.experimental.constrained.fadd.f32(float %3, float %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %4 | 
|  | } | 
|  |  | 
|  | ; Verify constrained fmul and fadd aren't fused. | 
|  | define double @f12(double %0, double %1, double %2) #0 { | 
|  | ; NOFMA-LABEL: f12: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    mulsd %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addsd %xmm2, %xmm0 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f12: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    vaddsd %xmm2, %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f12: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    vaddsd %xmm2, %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = call double @llvm.experimental.constrained.fmul.f64(double %0, double %1, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %4 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %4 | 
|  | } | 
|  |  | 
|  | ; Verify that fmuladd(3.5) isn't simplified when the rounding mode is | 
|  | ; unknown. | 
|  | define float @f15() #0 { | 
|  | ; NOFMA-LABEL: f15: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    movss {{.*#+}} xmm1 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; NOFMA-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    mulss %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addss %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f15: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f15: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; FMA4-NEXT:    vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %result = call float @llvm.experimental.constrained.fmuladd.f32( | 
|  | float 3.5, | 
|  | float 3.5, | 
|  | float 3.5, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | ; Verify that fmuladd(42.1) isn't simplified when the rounding mode is | 
|  | ; unknown. | 
|  | define double @f16() #0 { | 
|  | ; NOFMA-LABEL: f16: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    movsd {{.*#+}} xmm1 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; NOFMA-NEXT:    movapd %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    mulsd %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addsd %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f16: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f16: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; FMA4-NEXT:    vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %result = call double @llvm.experimental.constrained.fmuladd.f64( | 
|  | double 42.1, | 
|  | double 42.1, | 
|  | double 42.1, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | ; Verify that fma(3.5) isn't simplified when the rounding mode is | 
|  | ; unknown. | 
|  | define float @f17() #0 { | 
|  | ; NOFMA-LABEL: f17: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; NOFMA-NEXT:    movaps %xmm0, %xmm1 | 
|  | ; NOFMA-NEXT:    movaps %xmm0, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f17: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f17: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] | 
|  | ; FMA4-NEXT:    vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %result = call float @llvm.experimental.constrained.fma.f32( | 
|  | float 3.5, | 
|  | float 3.5, | 
|  | float 3.5, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret float %result | 
|  | } | 
|  |  | 
|  | ; Verify that fma(42.1) isn't simplified when the rounding mode is | 
|  | ; unknown. | 
|  | define double @f18() #0 { | 
|  | ; NOFMA-LABEL: f18: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    pushq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; NOFMA-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; NOFMA-NEXT:    movaps %xmm0, %xmm1 | 
|  | ; NOFMA-NEXT:    movaps %xmm0, %xmm2 | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    popq %rax | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f18: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f18: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] | 
|  | ; FMA4-NEXT:    vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %result = call double @llvm.experimental.constrained.fma.f64( | 
|  | double 42.1, | 
|  | double 42.1, | 
|  | double 42.1, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret double %result | 
|  | } | 
|  |  | 
|  | define <4 x float> @f19(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { | 
|  | ; NOFMA-LABEL: f19: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 96 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,3,3,3] | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    punpckldq (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; NOFMA-NEXT:    punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm1 = xmm1[0],mem[0] | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f19: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f19: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <4 x float> %0 | 
|  | %result = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %3, <4 x float> %1, <4 x float> %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <4 x float> %result | 
|  | } | 
|  |  | 
|  | define <2 x double> @f20(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { | 
|  | ; NOFMA-LABEL: f20: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 80 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f20: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f20: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <2 x double> %0 | 
|  | %result = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %3, <2 x double> %1, <2 x double> %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <2 x double> %result | 
|  | } | 
|  |  | 
|  | define <4 x float> @f21(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { | 
|  | ; NOFMA-LABEL: f21: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 96 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 | 
|  | ; NOFMA-NEXT:    movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; NOFMA-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm1 = xmm1[0],mem[0] | 
|  | ; NOFMA-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f21: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f21: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <4 x float> %2 | 
|  | %result = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %3, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <4 x float> %result | 
|  | } | 
|  |  | 
|  | define <2 x double> @f22(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { | 
|  | ; NOFMA-LABEL: f22: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 80 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] | 
|  | ; NOFMA-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f22: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f22: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <2 x double> %2 | 
|  | %result = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %3, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <2 x double> %result | 
|  | } | 
|  |  | 
|  | define <4 x float> @f23(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { | 
|  | ; NOFMA-LABEL: f23: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 96 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movdqa {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    pxor %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pxor %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    punpckldq (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; NOFMA-NEXT:    punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm1 = xmm1[0],mem[0] | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f23: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f23: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <4 x float> %0 | 
|  | %4 = fneg <4 x float> %2 | 
|  | %result = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %3, <4 x float> %1, <4 x float> %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <4 x float> %result | 
|  | } | 
|  |  | 
|  | define <2 x double> @f24(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { | 
|  | ; NOFMA-LABEL: f24: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 80 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f24: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f24: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <2 x double> %0 | 
|  | %4 = fneg <2 x double> %2 | 
|  | %result = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %3, <2 x double> %1, <2 x double> %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | ret <2 x double> %result | 
|  | } | 
|  |  | 
|  | define <4 x float> @f25(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { | 
|  | ; NOFMA-LABEL: f25: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 96 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,3,3,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; NOFMA-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm1 = xmm1[0],mem[0] | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX1-LABEL: f25: | 
|  | ; FMA-AVX1:       # %bb.0: # %entry | 
|  | ; FMA-AVX1-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-AVX1-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f25: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX512-LABEL: f25: | 
|  | ; FMA-AVX512:       # %bb.0: # %entry | 
|  | ; FMA-AVX512-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; FMA-AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-AVX512-NEXT:    retq | 
|  | entry: | 
|  | %3 = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg <4 x float> %3 | 
|  | ret <4 x float> %result | 
|  | } | 
|  |  | 
|  | define <2 x double> @f26(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { | 
|  | ; NOFMA-LABEL: f26: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 80 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1] | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] | 
|  | ; NOFMA-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f26: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 | 
|  | ; FMA-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f26: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 | 
|  | ; FMA4-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg <2 x double> %3 | 
|  | ret <2 x double> %result | 
|  | } | 
|  |  | 
|  | define <4 x float> @f27(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { | 
|  | ; NOFMA-LABEL: f27: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 96 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movdqa {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    pxor %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pxor %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] | 
|  | ; NOFMA-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    punpckldq (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] | 
|  | ; NOFMA-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[1,1,1,1] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] | 
|  | ; NOFMA-NEXT:    callq fmaf@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; NOFMA-NEXT:    punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm1 = xmm1[0],mem[0] | 
|  | ; NOFMA-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $88, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX1-LABEL: f27: | 
|  | ; FMA-AVX1:       # %bb.0: # %entry | 
|  | ; FMA-AVX1-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-AVX1-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f27: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | ; | 
|  | ; FMA-AVX512-LABEL: f27: | 
|  | ; FMA-AVX512:       # %bb.0: # %entry | 
|  | ; FMA-AVX512-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] | 
|  | ; FMA-AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 | 
|  | ; FMA-AVX512-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <4 x float> %0 | 
|  | %4 = fneg <4 x float> %2 | 
|  | %5 = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %3, <4 x float> %1, <4 x float> %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg <4 x float> %5 | 
|  | ret <4 x float> %result | 
|  | } | 
|  |  | 
|  | define <2 x double> @f28(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { | 
|  | ; NOFMA-LABEL: f28: | 
|  | ; NOFMA:       # %bb.0: # %entry | 
|  | ; NOFMA-NEXT:    subq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 80 | 
|  | ; NOFMA-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm0 | 
|  | ; NOFMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    xorps %xmm3, %xmm2 | 
|  | ; NOFMA-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; NOFMA-NEXT:    pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm0 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload | 
|  | ; NOFMA-NEXT:    # xmm2 = mem[2,3,2,3] | 
|  | ; NOFMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] | 
|  | ; NOFMA-NEXT:    callq fma@PLT | 
|  | ; NOFMA-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload | 
|  | ; NOFMA-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] | 
|  | ; NOFMA-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; NOFMA-NEXT:    movdqa %xmm1, %xmm0 | 
|  | ; NOFMA-NEXT:    addq $72, %rsp | 
|  | ; NOFMA-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; NOFMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA-LABEL: f28: | 
|  | ; FMA:       # %bb.0: # %entry | 
|  | ; FMA-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 | 
|  | ; FMA-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA-NEXT:    retq | 
|  | ; | 
|  | ; FMA4-LABEL: f28: | 
|  | ; FMA4:       # %bb.0: # %entry | 
|  | ; FMA4-NEXT:    vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 | 
|  | ; FMA4-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; FMA4-NEXT:    retq | 
|  | entry: | 
|  | %3 = fneg <2 x double> %0 | 
|  | %4 = fneg <2 x double> %2 | 
|  | %5 = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %3, <2 x double> %1, <2 x double> %4, | 
|  | metadata !"round.dynamic", | 
|  | metadata !"fpexcept.strict") #0 | 
|  | %result = fneg <2 x double> %5 | 
|  | ret <2 x double> %result | 
|  | } | 
|  |  | 
|  | attributes #0 = { strictfp } | 
|  |  | 
|  | declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) | 
|  | declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) | 
|  | declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) | 
|  | declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) | 
|  | declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) | 
|  | declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) | 
|  | declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) | 
|  | declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) | 
|  | declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) | 
|  | declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) |