blob: ce8e00e5bc00df00e5e08212bbcd7102a218d78f [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
;
; VFMADD
;
define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddsd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddsd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddsd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddsd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddsd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddss_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddss_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddss_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddss_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddss_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
;
; VFMADDSUB
;
define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddsubpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddsubpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddsubpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddsubpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddsubpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddsubpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddsubpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddsubpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddsubpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddsubpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddsubps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddsubps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddsubps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddsubps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddsubps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmaddsubps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmaddsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmaddsubps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmaddsubps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmaddsubps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmaddsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmaddsubps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
;
; VFMSUBADD
;
define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubaddpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubaddpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubaddpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubaddpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubaddpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubaddpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubaddpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubaddpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubaddpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubaddpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubaddps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubaddps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubaddps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubaddps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubaddps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubaddps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubaddps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubaddps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubaddps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubaddps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
;
; VFMSUB
;
define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubsd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubsd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubsd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubsd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubsd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfmsubss_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfmsubss_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfmsubss_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfmsubss_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfmsubss_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
;
; VFNMADD
;
define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddsd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddsd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddsd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddsd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddsd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmaddss_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmaddss_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmaddss_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmaddss_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmaddss_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
;
; VFNMSUB
;
define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubpd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubpd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubpd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubpd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubpd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubpd_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubpd_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubpd_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubpd_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubpd_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubps_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubps_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubps_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubps_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubps_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubps_256:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubps_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubps_256:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubps_256:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vzeroupper # sched: [0:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubps_256:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubsd_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubsd_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubsd_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubsd_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubsd_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
; GENERIC-LABEL: test_vfnmsubss_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_vfnmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_vfnmsubss_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vfnmsubss_128:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_vfnmsubss_128:
; KNL: # %bb.0:
; KNL-NEXT: #APP
; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_vfnmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_vfnmsubss_128:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}