| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 | FileCheck %s |
| |
| define <8 x bfloat> @fma_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %xmm1, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %xmm1, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %xmm1, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { |
| ; CHECK-LABEL: fma_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %xmm1, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| ret <8 x bfloat> %a |
| } |
| |
| define <8 x bfloat> @fma_mask_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %xmm1, %xmm2, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %xmm2, %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %xmm1, %xmm2, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %xmm2, %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm1, %xmm2, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %xmm1, %xmm2, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %xmm1, %xmm2, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm1, %xmm2, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %xmm1, %xmm2, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_mask_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_mask_load_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_123_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_213_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_231_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_321_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_132_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <8 x bfloat> @fma_maskz_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_312_v8bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %xmm1, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <8 x bfloat>, ptr %zp |
| %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) |
| %b = bitcast i8 %mask to <8 x i1> |
| %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer |
| ret <8 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %ymm1, %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %ymm1, %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %ymm1, %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { |
| ; CHECK-LABEL: fma_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %ymm1, %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| ret <16 x bfloat> %a |
| } |
| |
| define <16 x bfloat> @fma_mask_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %ymm1, %ymm2, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %ymm2, %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %ymm1, %ymm2, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %ymm2, %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm1, %ymm2, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %ymm1, %ymm2, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %ymm1, %ymm2, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm1, %ymm2, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %ymm1, %ymm2, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_mask_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_mask_load_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_123_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_213_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_231_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_321_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_132_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <16 x bfloat> @fma_maskz_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_312_v16bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %ymm1, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <16 x bfloat>, ptr %zp |
| %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) |
| %b = bitcast i16 %mask to <16 x i1> |
| %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer |
| ret <16 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %zmm2, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %zmm2, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %zmm1, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 %zmm1, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %zmm1, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { |
| ; CHECK-LABEL: fma_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 %zmm1, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { |
| ; CHECK-LABEL: fma_load_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| ret <32 x bfloat> %a |
| } |
| |
| define <32 x bfloat> @fma_mask_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %zmm1, %zmm2, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm2, %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %zmm2, %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %zmm1, %zmm2, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 %zmm2, %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm1, %zmm2, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm2, %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm2, %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %zmm1, %zmm2, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 %zmm1, %zmm2, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm1, %zmm2, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 %zmm1, %zmm2, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_mask_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_mask_load_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_123_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_213_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd213bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_231_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_321_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd231bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_132_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |
| |
| define <32 x bfloat> @fma_maskz_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { |
| ; CHECK-LABEL: fma_maskz_load_312_v32bf16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vfmadd132bf16 (%rdi), %zmm1, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %z = load <32 x bfloat>, ptr %zp |
| %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) |
| %b = bitcast i32 %mask to <32 x i1> |
| %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer |
| ret <32 x bfloat> %c |
| } |