| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl < %s | FileCheck %s |
| |
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-unknown-unknown" |
| |
| ; Stack reload folding tests. |
| ; |
| ; By including a nop call with sideeffects we can force a partial register spill of the |
| ; relevant registers and check that the reload is correctly folded into the instruction. |
| |
| define <8 x half> @stack_fold_fmadd123ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) |
| ret <8 x half> %2 |
| } |
| declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) |
| |
| define <8 x half> @stack_fold_fmadd213ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %a2) |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @stack_fold_fmadd231ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %a0) |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @stack_fold_fmadd321ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %a0) |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @stack_fold_fmadd132ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %a1) |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @stack_fold_fmadd312ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %a1) |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @stack_fold_fmadd123ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd213ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %a2) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd231ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %a0) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd321ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %a0) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd132ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %a1) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd312ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %a1) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmadd123ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmadd213ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %a2) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmadd231ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %a0) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmadd321ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %a0) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmadd132ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %a1) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmadd312ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %a1) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub123ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub213ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub231ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub321ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub132ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub312ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fmsub123ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub213ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub231ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub321ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub132ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub312ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fmsub123ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub213ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a0, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub231ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a1, <8 x half> %a2, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub321ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a1, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub132ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %a2, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fmsub312ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a2, <8 x half> %a0, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd123ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a1, <8 x half> %a2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd213ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a0, <8 x half> %a2) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd231ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a2, <8 x half> %a0) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd321ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a1, <8 x half> %a0) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd132ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a2, <8 x half> %a1) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd312ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a0, <8 x half> %a1) |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd123ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a1, <8 x half> %a2) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd213ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a0, <8 x half> %a2) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd231ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a2, <8 x half> %a0) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd321ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a1, <8 x half> %a0) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd132ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a2, <8 x half> %a1) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd312ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a0, <8 x half> %a1) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd123ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a1, <8 x half> %a2) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd213ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a0, <8 x half> %a2) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd231ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a2, <8 x half> %a0) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd321ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a1, <8 x half> %a0) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd132ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a2, <8 x half> %a1) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmadd312ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg, <8 x half> %a0, <8 x half> %a1) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub123ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = fneg <8 x half> %a2 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a1, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub213ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = fneg <8 x half> %a2 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a0, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub231ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a1 |
| %3 = fneg <8 x half> %a0 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a2, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub321ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = fneg <8 x half> %a0 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a1, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub132ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a0 |
| %3 = fneg <8 x half> %a1 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a2, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub312ph(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <8 x half> %a2 |
| %3 = fneg <8 x half> %a1 |
| %4 = call <8 x half> @llvm.fma.v8f16(<8 x half> %2, <8 x half> %a0, <8 x half> %3) |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub123ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %neg1 = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a1, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub213ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a2 |
| %neg1 = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a0, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub231ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %neg1 = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a2, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub321ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a0 |
| %neg1 = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a1, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub132ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %neg1 = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a2, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub312ph_mask(ptr %p, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %xmm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <8 x half>, ptr %p |
| %neg = fneg <8 x half> %a1 |
| %neg1 = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a0, <8 x half> %neg) |
| %3 = bitcast i8 %mask to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0 |
| ret <8 x half> %4 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub123ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %neg1 = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a1, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub213ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a2 |
| %neg1 = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a0, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub231ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %neg1 = fneg <8 x half> %a1 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a2, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub321ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a0 |
| %neg1 = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a1, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub132ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %neg1 = fneg <8 x half> %a0 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a2, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <8 x half> @stack_fold_fnmsub312ph_maskz(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovb (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 {%k1} {z} # 16-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <8 x half> %a1 |
| %neg1 = fneg <8 x half> %a2 |
| %2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %neg1, <8 x half> %a0, <8 x half> %neg) |
| %3 = load i8, ptr %mask |
| %4 = bitcast i8 %3 to <8 x i1> |
| %5 = select <8 x i1> %4, <8 x half> %2, <8 x half> zeroinitializer |
| ret <8 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd123ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) |
| ret <16 x half> %2 |
| } |
| declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>) |
| |
| define <16 x half> @stack_fold_fmadd213ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %a2) |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @stack_fold_fmadd231ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %a0) |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @stack_fold_fmadd321ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %a0) |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @stack_fold_fmadd132ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %a1) |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @stack_fold_fmadd312ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %a1) |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @stack_fold_fmadd123ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd213ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %a2) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd231ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %a0) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd321ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %a0) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd132ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %a1) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd312ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %a1) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmadd123ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd123ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd213ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd213ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %a2) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd231ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd231ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %a0) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd321ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd321ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %a0) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd132ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd132ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %a1) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmadd312ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmadd312ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %a1) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub123ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub213ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub231ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub321ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub132ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub312ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fmsub123ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub213ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub231ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub321ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub132ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub312ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fmsub123ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub123ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub213ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub213ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a0, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub231ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub231ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a1, <16 x half> %a2, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub321ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub321ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a1, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub132ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub132ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %a2, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fmsub312ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fmsub312ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a2, <16 x half> %a0, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd123ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a1, <16 x half> %a2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd213ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a0, <16 x half> %a2) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd231ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a2, <16 x half> %a0) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd321ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a1, <16 x half> %a0) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd132ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a2, <16 x half> %a1) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd312ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a0, <16 x half> %a1) |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd123ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a1, <16 x half> %a2) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd213ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a0, <16 x half> %a2) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd231ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a2, <16 x half> %a0) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd321ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a1, <16 x half> %a0) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd132ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a2, <16 x half> %a1) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd312ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a0, <16 x half> %a1) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd123ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd123ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a1, <16 x half> %a2) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd213ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd213ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a0, <16 x half> %a2) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd231ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd231ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a2, <16 x half> %a0) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd321ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd321ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a1, <16 x half> %a0) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd132ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd132ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a2, <16 x half> %a1) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmadd312ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmadd312ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmadd132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg, <16 x half> %a0, <16 x half> %a1) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub123ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = fneg <16 x half> %a2 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a1, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub213ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = fneg <16 x half> %a2 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a0, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub231ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a1 |
| %3 = fneg <16 x half> %a0 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a2, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub321ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = fneg <16 x half> %a0 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a1, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub132ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a0 |
| %3 = fneg <16 x half> %a1 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a2, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub312ph_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %2 = fneg <16 x half> %a2 |
| %3 = fneg <16 x half> %a1 |
| %4 = call <16 x half> @llvm.fma.v16f16(<16 x half> %2, <16 x half> %a0, <16 x half> %3) |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub123ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %neg1 = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a1, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub213ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a2 |
| %neg1 = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a0, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub231ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %neg1 = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a2, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub321ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a0 |
| %neg1 = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a1, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub132ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %neg1 = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a2, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub312ph_mask_ymm(ptr %p, <16 x half> %a1, <16 x half> %a2, i16 %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph_mask_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vmovaps (%rdi), %ymm2 |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %a0 = load <16 x half>, ptr %p |
| %neg = fneg <16 x half> %a1 |
| %neg1 = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a0, <16 x half> %neg) |
| %3 = bitcast i16 %mask to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0 |
| ret <16 x half> %4 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub123ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub123ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %neg1 = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a1, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub213ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub213ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub213ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a2 |
| %neg1 = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a0, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub231ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub231ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %neg1 = fneg <16 x half> %a1 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a2, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub321ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub321ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub231ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a0 |
| %neg1 = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a1, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub132ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub132ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %neg1 = fneg <16 x half> %a0 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a2, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |
| |
| define <16 x half> @stack_fold_fnmsub312ph_maskz_ymm(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, ptr %mask) { |
| ; CHECK-LABEL: stack_fold_fnmsub312ph_maskz_ymm: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: kmovw (%rdi), %k1 |
| ; CHECK-NEXT: vfnmsub132ph {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 {%k1} {z} # 32-byte Folded Reload |
| ; CHECK-NEXT: retq |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %neg = fneg <16 x half> %a1 |
| %neg1 = fneg <16 x half> %a2 |
| %2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %neg1, <16 x half> %a0, <16 x half> %neg) |
| %3 = load i16, ptr %mask |
| %4 = bitcast i16 %3 to <16 x i1> |
| %5 = select <16 x i1> %4, <16 x half> %2, <16 x half> zeroinitializer |
| ret <16 x half> %5 |
| } |