| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s |
| |
| target triple = "aarch64-linux" |
| |
| ; Widening |
| define void @mop4a_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_s8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_s8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_u8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_u8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_s8_u8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: sumop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.sumop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_s8_u8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: sumop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.sumop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_u8_s8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: usmop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.usmop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_u8_s8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: usmop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.usmop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| |
| define void @mop4a_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4a.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4a.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4s.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.wide.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.wide.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za64_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za64_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: umop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.umop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za64_s16_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: sumop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.sumop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_s16_u16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: sumop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.sumop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za64_u16_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: usmop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.usmop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_u16_s16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: usmop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.usmop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| ; Non-widening |
| define void @mop4a_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za16_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4a za0.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za16_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za0.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za32_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4a za0.s, { z0.s, z1.s }, { z24.s, z25.s } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.2x2.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za0.s, { z0.s, z1.s }, { z24.s, z25.s } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za64_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4a za0.d, { z0.d, z1.d }, { z24.d, z25.d } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.2x2.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za0.d, { z0.d, z1.d }, { z24.d, z25.d } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) |
| ret void |
| } |
| |
| define void @mop4a_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4a_za16_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4a za0.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4a.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za16_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4s za0.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| ; Tile limits |
| |
| define void @mop4s_za32_s8_limit(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_s8_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za3.s, { z0.b, z1.b }, { z24.b, z25.b } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv16i8(i32 3, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_s16_limit(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_s16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv8i16(i32 3, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_f16_limit(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_f16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8f16(i32 3, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_bf16_limit(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_bf16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8bf16(i32 3, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_s16_limit(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_s16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: smop4s za7.d, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.smop4s.za64.wide.2x2.nxv8i16(i32 7, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za64_f64_limit(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za64_f64_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za7.d, { z0.d, z1.d }, { z24.d, z25.d } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv2f64(i32 7, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za32_f32_limit(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za32_f32_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za3.s, { z0.s, z1.s }, { z24.s, z25.s } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv4f32(i32 3, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za16_f16_limit(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za16_f16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: fmop4s za1.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv8f16(i32 1, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) |
| ret void |
| } |
| |
| define void @mop4s_za16_bf16_limit(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) #0 { |
| ; CHECK-LABEL: mop4s_za16_bf16_limit: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z25.d, z3.d |
| ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: mov z24.d, z2.d |
| ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 |
| ; CHECK-NEXT: bfmop4s za1.h, { z0.h, z1.h }, { z24.h, z25.h } |
| ; CHECK-NEXT: ret |
| call void @llvm.aarch64.sme.mop4s.2x2.nxv8bf16(i32 1, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) |
| ret void |
| } |
| |
| attributes #0 = {nounwind "target-features" = "+sme-i16i64,+sme-f64f64,+sme-b16b16,+sme2p1,+bf16,+sme-f16f16,+sme-mop4" } |