blob: edd24b4a849b547db593a518eb6fa98321fdaeee [file] [log] [blame]
//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
//
//===----------------------------------------------------------------------===//
def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3, 1>", []>;
def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1, 1>", []>;
def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; // nop
def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>;
def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2, 2>", []>;
def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 2>", []>;
def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4, 4>", []>;
def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>;
def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
//===----------------------------------------------------------------------===//
// SME Pseudo Classes
//===----------------------------------------------------------------------===//
def getSMEPseudoMap : InstrMapping {
let FilterClass = "SMEPseudo2Instr";
let RowFields = ["PseudoName"];
let ColFields = ["IsInstr"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
class SMEPseudo2Instr<string name, bit instr> {
string PseudoName = name;
bit IsInstr = instr;
}
class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
: Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
zpr_ty:$zn, zpr_ty:$zm), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
ValueType vt, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm),
(!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>;
class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
ValueType vt, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
zpr_ty:$Zm)>;
class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
ValueType vt, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
(REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
zpr_ty:$Zm)>;
class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
(REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
(REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
Operand imm_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)),
(!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>;
class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
Operand imm_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
Operand imm_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
zpr_ty:$Zm, imm_ty:$i)>;
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
(!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
imm_ty:$i)>;
class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt>
: Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
(!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
//===----------------------------------------------------------------------===//
// SME smstart/smstop
//===----------------------------------------------------------------------===//
// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
// both fields:
//
// MSR SVCRSM, #<imm1>
// MSR SVCRZA, #<imm1>
// MSR SVCRSMZA, #<imm1>
//
// It's tricky to using the existing pstate operand defined in
// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
// when these fields are also encoded in CRm[3:1].
def MSRpstatesvcrImm1
: PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
"\t$pstatefield, $imm">,
Sched<[WriteSys]> {
bits<3> pstatefield;
bit imm;
let Inst{18-16} = 0b011; // op1
let Inst{11-9} = pstatefield;
let Inst{8} = imm;
let Inst{7-5} = 0b011; // op2
}
def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
//===----------------------------------------------------------------------===//
// SME Outer Products
//===----------------------------------------------------------------------===//
class sme_fp_outer_product_inst<bit S, bits<2> sz, bit op, MatrixTileOperand za_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs za_ty:$ZAda),
(ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-25} = 0b1000000;
let Inst{24} = op;
let Inst{23} = 0b1;
let Inst{22-21} = sz;
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = op;
let Constraints = "$ZAda = $_ZAda";
}
multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
def NAME : sme_fp_outer_product_inst<S, 0b00, 0b0, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
}
multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
def NAME : sme_fp_outer_product_inst<S, 0b10, 0b0, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
}
multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s>{
def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b1, TileOp16, ZPR16, mnemonic> {
bits<1> ZAda;
let Inst{2-1} = 0b00;
let Inst{0} = ZAda;
}
}
class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs za_ty:$ZAda),
(ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-25} = 0b1010000;
let Inst{24} = opc{2}; // u0
let Inst{23} = 0b1;
let Inst{22} = sz;
let Inst{21} = opc{1}; // u1
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = opc{0}; //S;
let Inst{3} = sme2;
let Constraints = "$ZAda = $_ZAda";
}
multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
SDPatternOperator op> {
def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32,
ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>;
}
multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
SDPatternOperator op> {
def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
}
class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
bits<2> ZAda;
let Inst{31-25} = 0b1000000;
let Inst{24} = !if(opc{2}, 0, 1);
let Inst{23-22} = 0b10;
let Inst{21} = opc{1};
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = opc{0};
let Inst{3} = opc{2};
let Inst{2} = 0b0;
let Inst{1-0} = ZAda;
let Constraints = "$ZAda = $_ZAda";
}
multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>;
}
multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
}
//===----------------------------------------------------------------------===//
// SME Add Vector to Tile
//===----------------------------------------------------------------------===//
class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs tile_ty:$ZAda),
(ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
"", []>, Sched<[]> {
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-23} = 0b110000001;
let Inst{22} = op;
let Inst{21-17} = 0b01000;
let Inst{16} = V;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4-3} = 0b00;
let Constraints = "$ZAda = $_ZAda";
}
class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
: Pseudo<(outs),
(ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> ZAda;
let Inst{2} = 0b0;
let Inst{1-0} = ZAda;
}
def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
(nxv4i32 ZPR32:$zn)),
(!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>;
}
multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> {
def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
let Predicates = [HasSMEI16I64] in {
def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
(nxv2i64 ZPR64:$zn)),
(!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>;
}
}
//===----------------------------------------------------------------------===//
// SME Contiguous Loads
//===----------------------------------------------------------------------===//
class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<5> Rm;
bits<2> Rv;
bits<3> Pg;
bits<5> Rn;
let Inst{31-25} = 0b1110000;
let Inst{24} = Q;
let Inst{23-22} = msz;
let Inst{21} = 0b0;
let Inst{20-16} = Rm;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let mayLoad = 1;
}
class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
MatrixTileVectorOperand tile_ty, bit is_col,
Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_ld_ss_base<
Q, is_col, msz, (outs tile_ty:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
MatrixTileVectorOperand tile_ty,
Operand imm_ty, RegisterOperand gpr_ty,
string pg_suffix=""> {
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
// Default XZR offset aliases
def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
string pg_suffix=""> {
defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
!if(is_col, TileVectorOpV8, TileVectorOpH8),
sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
!if(is_col, TileVectorOpV16, TileVectorOpH16),
sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
!if(is_col, TileVectorOpV32, TileVectorOpH32),
sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
!if(is_col, TileVectorOpV64, TileVectorOpH64),
sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
!if(is_col, TileVectorOpV128, TileVectorOpH128),
sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
}
multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
}
multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
Operand tile_ty, Operand offset_ty,
ComplexPattern addr,
ComplexPattern tileslice> {
// base, tileslice
def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
(i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
(Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
// reg + reg, tileslice
let AddedComplexity = 1 in {
def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
offset_ty:$imm))),
(Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
}
}
class sme_load_pseudo
: Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
let mayLoad = 1;
}
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8, TileVectorOpH8),
is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
!if(is_col, TileVectorOpV16, TileVectorOpH16),
is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
!if(is_col, TileVectorOpV32, TileVectorOpH32),
is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
!if(is_col, TileVectorOpV64, TileVectorOpH64),
is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
!if(is_col, TileVectorOpV128, TileVectorOpH128),
is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
defm : sme_mem_ld_ss_aliases<NAME, is_col>;
// Pseudo instructions for lowering intrinsics, using immediates instead of
// tile registers.
def _PSEUDO_B : sme_load_pseudo;
def _PSEUDO_H : sme_load_pseudo;
def _PSEUDO_S : sme_load_pseudo;
def _PSEUDO_D : sme_load_pseudo;
def _PSEUDO_Q : sme_load_pseudo;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
!if(is_col, int_aarch64_sme_ld1b_vert,
int_aarch64_sme_ld1b_horiz),
sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
tileslice8>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
!if(is_col, int_aarch64_sme_ld1h_vert,
int_aarch64_sme_ld1h_horiz),
timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
tileslice16>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
!if(is_col, int_aarch64_sme_ld1w_vert,
int_aarch64_sme_ld1w_horiz),
timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
tileslice32>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
!if(is_col, int_aarch64_sme_ld1d_vert,
int_aarch64_sme_ld1d_horiz),
timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
tileslice64>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
!if(is_col, int_aarch64_sme_ld1q_vert,
int_aarch64_sme_ld1q_horiz),
timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
tileslice128>;
}
multiclass sme_mem_ld_ss<string mnemonic> {
defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Contiguous Stores
//===----------------------------------------------------------------------===//
class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
string mnemonic, string argstr>
: I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<5> Rm;
bits<2> Rv;
bits<3> Pg;
bits<5> Rn;
let Inst{31-25} = 0b1110000;
let Inst{24} = Q;
let Inst{23-22} = msz;
let Inst{21} = 0b1;
let Inst{20-16} = Rm;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let mayStore = 1;
let hasSideEffects = 1;
}
class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
MatrixTileVectorOperand tile_ty, bit is_col,
Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_st_ss_base<
Q, is_col, msz,
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
GPR64sp:$Rn, gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
}
multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
Operand offset_ty,
ComplexPattern imm2tile,
ComplexPattern addr,
ComplexPattern tileslice> {
// base, tileslice
def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
(i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
(Inst $tile, $idx, $imm, $pg, $base, XZR)>;
// reg + reg, tileslice
let AddedComplexity = 1 in {
def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
(imm2tile untyped:$tile),
(i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
(Inst $tile, $idx, $imm, $pg, $base, $offset)>;
}
}
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8, TileVectorOpH8),
is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
!if(is_col, TileVectorOpV16, TileVectorOpH16),
is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
!if(is_col, TileVectorOpV32, TileVectorOpH32),
is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
!if(is_col, TileVectorOpV64, TileVectorOpH64),
is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
!if(is_col, TileVectorOpV128, TileVectorOpH128),
is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
defm : sme_mem_st_ss_aliases<NAME, is_col>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
!if(is_col, int_aarch64_sme_st1b_vert,
int_aarch64_sme_st1b_horiz),
timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
tileslice8>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
!if(is_col, int_aarch64_sme_st1h_vert,
int_aarch64_sme_st1h_horiz),
timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
tileslice16>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
!if(is_col, int_aarch64_sme_st1w_vert,
int_aarch64_sme_st1w_horiz),
timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
tileslice32>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
!if(is_col, int_aarch64_sme_st1d_vert,
int_aarch64_sme_st1d_horiz),
timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
tileslice64>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
!if(is_col, int_aarch64_sme_st1q_vert,
int_aarch64_sme_st1q_horiz),
sme_elm_idx0_0, imm_to_tile128,
am_sve_regreg_lsl4, tileslice128>;
}
multiclass sme_mem_st_ss<string mnemonic> {
defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Save and Restore Array
//===----------------------------------------------------------------------===//
class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
: I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
[]>,
Sched<[]> {
bits<2> Rv;
bits<5> Rn;
bits<4> imm4;
let Inst{31-22} = 0b1110000100;
let Inst{21} = isStore;
let Inst{20-15} = 0b000000;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b000;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let Inst{3-0} = imm4;
}
let mayStore = 1 in
class sme_spill_inst<string opcodestr>
: sme_spill_fill_base<0b1, (outs),
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
let mayLoad = 1 in
class sme_fill_inst<string opcodestr>
: sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv,
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
multiclass sme_spill<string opcodestr> {
def NAME : sme_spill_inst<opcodestr>;
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
// base
def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
(!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
}
multiclass sme_fill<string opcodestr> {
def NAME : sme_fill_inst<opcodestr>;
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
def NAME # _PSEUDO
: Pseudo<(outs),
(ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
GPR64sp:$base), []>,
Sched<[]> {
// Translated to actual instruction in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
let mayLoad = 1;
}
// base
def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
(!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
}
//===----------------------------------------------------------------------===//
// Move instructions
//===----------------------------------------------------------------------===//
class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<2> Rv;
bits<3> Pg;
bits<5> Zn;
let Inst{31-24} = 0b11000000;
let Inst{23-22} = sz;
let Inst{21-17} = 0b00000;
let Inst{16} = Q;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
}
class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
string mnemonic>
: sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
(ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
let Constraints = "$ZAd = $_ZAd";
}
multiclass sme_vector_to_tile_aliases<Instruction inst,
MatrixTileVectorOperand tile_ty,
ZPRRegOp zpr_ty, Operand imm_ty> {
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
}
multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
ValueType ppr_vt, Operand imm_ty,
Operand offset_ty,
SDPatternOperator op,
ComplexPattern tileslice> {
def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
offset_ty:$imm)),
(ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
(inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
}
class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
: Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, sme_elm_idx0_15, ZPR8, mnemonic>,
SMEPseudo2Instr<NAME # _B, 1> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, sme_elm_idx0_7, ZPR16, mnemonic>,
SMEPseudo2Instr<NAME # _H, 1> {
bits<1> ZAd;
bits<3> imm;
let Inst{3} = ZAd;
let Inst{2-0} = imm;
}
def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, sme_elm_idx0_3, ZPR32, mnemonic>,
SMEPseudo2Instr<NAME # _S, 1> {
bits<2> ZAd;
bits<2> imm;
let Inst{3-2} = ZAd;
let Inst{1-0} = imm;
}
def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, sme_elm_idx0_1, ZPR64, mnemonic>,
SMEPseudo2Instr<NAME # _D, 1> {
bits<3> ZAd;
bits<1> imm;
let Inst{3-1} = ZAd;
let Inst{0} = imm;
}
def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col, sme_elm_idx0_0, ZPR128, mnemonic>,
SMEPseudo2Instr<NAME # _Q, 1> {
bits<4> ZAd;
bits<1> imm;
let Inst{3-0} = ZAd;
}
// Pseudo instructions for lowering intrinsics, using immediates instead of
// tile registers.
def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>;
def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>;
def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>;
def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>;
def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
ZPR8, sme_elm_idx0_15>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
ZPR16, sme_elm_idx0_7>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
ZPR32, sme_elm_idx0_3>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
ZPR64, sme_elm_idx0_1>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
!if(is_col, TileVectorOpV128,
TileVectorOpH128),
ZPR128, sme_elm_idx0_0>;
defvar op = !if(is_col, int_aarch64_sme_write_vert,
int_aarch64_sme_write_horiz);
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
op, tileslice8>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
op, tileslice32>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
op, tileslice32>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
op, tileslice64>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
op, tileslice64>;
defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
int_aarch64_sme_writeq_horiz);
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv16i8, nxv16i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv8i16, nxv8i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv8f16, nxv8i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv8bf16, nxv8i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv4i32, nxv4i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv4f32, nxv4i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv2i64, nxv2i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
nxv2f64, nxv2i1, sme_elm_idx0_15,
sme_elm_idx0_0, opq, tileslice128>;
}
multiclass sme_vector_to_tile<string mnemonic> {
defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
}
class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<2> Rv;
bits<3> Pg;
bits<5> Zd;
let Inst{31-24} = 0b11000000;
let Inst{23-22} = sz;
let Inst{21-17} = 0b00001;
let Inst{16} = Q;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9} = 0b0;
let Inst{4-0} = Zd;
}
class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, string mnemonic>
: sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
(ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
let Constraints = "$Zd = $_Zd";
}
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
Operand imm_ty > {
def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
}
multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
ValueType ppr_vt, Operand offset_ty,
ComplexPattern imm2tile,
ComplexPattern tileslice,
SDPatternOperator op> {
def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
(imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
(inst $passthru, $pg, $tile, $idx, 0)>;
let AddedComplexity = 1 in {
def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
(imm2tile untyped:$tile),
(i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
offset_ty:$imm)))),
(inst $passthru, $pg, $tile, $idx, $imm)>;
}
}
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, sme_elm_idx0_15, mnemonic> {
bits<4> imm;
let Inst{8-5} = imm;
}
def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, sme_elm_idx0_7, mnemonic> {
bits<1> ZAn;
bits<3> imm;
let Inst{8} = ZAn;
let Inst{7-5} = imm;
}
def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, sme_elm_idx0_3, mnemonic> {
bits<2> ZAn;
bits<2> imm;
let Inst{8-7} = ZAn;
let Inst{6-5} = imm;
}
def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, sme_elm_idx0_1, mnemonic> {
bits<3> ZAn;
bits<1> imm;
let Inst{8-6} = ZAn;
let Inst{5} = imm;
}
def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col, sme_elm_idx0_0, mnemonic> {
bits<4> ZAn;
let Inst{8-5} = ZAn;
}
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
!if(is_col, TileVectorOpV8,
TileVectorOpH8), sme_elm_idx0_15>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
!if(is_col, TileVectorOpV16,
TileVectorOpH16), sme_elm_idx0_7>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
!if(is_col, TileVectorOpV32,
TileVectorOpH32), sme_elm_idx0_3>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
!if(is_col, TileVectorOpV64,
TileVectorOpH64), sme_elm_idx0_1>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
!if(is_col, TileVectorOpV128,
TileVectorOpH128), sme_elm_idx0_0>;
defvar op = !if(is_col, int_aarch64_sme_read_vert,
int_aarch64_sme_read_horiz);
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
nxv16i8, nxv16i1, sme_elm_idx0_15,
imm_to_tile8, tileslice8, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
nxv8i16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
nxv8f16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
nxv8bf16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
nxv4i32, nxv4i1, sme_elm_idx0_3,
imm_to_tile32, tileslice32, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
nxv4f32, nxv4i1, sme_elm_idx0_3,
imm_to_tile32, tileslice32, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
nxv2i64, nxv2i1, sme_elm_idx0_1,
imm_to_tile64, tileslice64, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
nxv2f64, nxv2i1, sme_elm_idx0_1,
imm_to_tile64, tileslice64, op>;
defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
int_aarch64_sme_readq_horiz);
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv16i8, nxv16i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv8i16, nxv8i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv8f16, nxv8i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv8bf16, nxv8i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv4i32, nxv4i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv4f32, nxv4i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv2i64, nxv2i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
nxv2f64, nxv2i1, sme_elm_idx0_0,
imm_to_tile128, tileslice128, opq>;
}
multiclass sme_tile_to_vector<string mnemonic> {
defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Zero
//===----------------------------------------------------------------------===//
// NOTE: This definition isn't really correct because there are outputs, i.e.
// the tile registers being zeroed. We fix this up in a custom inserter that
// marks the appropriate registers as being implicitly defined.
class sme_zero_inst<string mnemonic>
: I<(outs), (ins MatrixTileList:$imm),
mnemonic, "\t$imm", "", []>, Sched<[]> {
bits<8> imm;
let Inst{31-8} = 0b110000000000100000000000;
let Inst{7-0} = imm;
}
multiclass sme_zero<string mnemonic> {
def NAME : sme_zero_inst<mnemonic>;
def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
}
def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
(!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
}
//===----------------------------------------------------------------------===//
// SVE2 Instructions
//===----------------------------------------------------------------------===//
class sve2_int_perm_revd<string asm>
: I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
asm, "\t$Zd, $Pg/m, $Zn", "", []>,
Sched<[]> {
bits<5> Zd;
bits<3> Pg;
bits<5> Zn;
let Inst{31-24} = 0b00000101;
let Inst{23-22} = 0b00; // size
let Inst{21-13} = 0b101110100;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnary;
let ElementSize = ZPR128.ElementSize;
}
multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
def NAME : sve2_int_perm_revd<asm>;
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;
bits<5> Zn;
bits<5> Zd;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-11} = 0b11000;
let Inst{10} = U;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zpr_ty.ElementSize;
}
multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
: I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
Sched<[]> {
bits<2> Rv;
bits<4> Pn;
bits<4> Pm;
bits<4> Pd;
let Inst{31-24} = 0b00100101;
let Inst{21} = 0b1;
let Inst{17-16} = Rv;
let Inst{15-14} = 0b01;
let Inst{13-10} = Pn;
let Inst{9} = 0b0;
let Inst{8-5} = Pm;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
}
multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
let Inst{20-19} = imm{1-0};
let Inst{18} = 0b1;
}
def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
bits<3> imm;
let Inst{23-22} = imm{2-1};
let Inst{20} = imm{0};
let Inst{19-18} = 0b10;
}
def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
bits<2> imm;
let Inst{23-22} = imm{1-0};
let Inst{20-18} = 0b100;
}
def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
bits<1> imm;
let Inst{23} = imm;
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _B) PNRasPPRAny:$Pd,
PNRasPPRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _H) PNRasPPRAny:$Pd,
PNRasPPRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _S) PNRasPPRAny:$Pd,
PNRasPPRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _D) PNRasPPRAny:$Pd,
PNRasPPRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
let AddedComplexity = 1 in {
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
(i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
(i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
(i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
(i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
}
}
//===----------------------------------------------------------------------===//
// SME2 Instructions
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SME2 single-multi ternary int/fp, two/four registers
class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs matrix_ty:$ZAd),
(ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm",
"", []> , Sched<[]> {
bits<4> Zm;
bits<5> Zn;
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000010;
let Inst{22} = op{6}; //sz
let Inst{21} = 0b1;
let Inst{20} = op{5}; //vgx4
let Inst{19-16} = Zm;
let Inst{15} = 0b0;
let Inst{14-13} = Rv;
let Inst{12-10} = op{4-2};
let Inst{9-5} = Zn;
let Inst{4-3} = op{1-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAd = $_ZAd";
}
multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty>{
def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
}
multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
}
multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
}
//===----------------------------------------------------------------------===//
// SME2 multiple vectors ternary INT/FP two and four registers
class sme2_dot_mla_add_sub_array_vg2_multi<bits<6> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
string mnemonic>
: I<(outs matrix_ty:$ZAd),
(ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm",
"", []>, Sched<[]>{
bits<4> Zm;
bits<4> Zn;
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000011;
let Inst{22} = op{5}; //sz
let Inst{21} = 0b1;
let Inst{20-17} = Zm;
let Inst{16-15} = 0b00;
let Inst{14-13} = Rv;
let Inst{12-10} = op{4-2};
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-3} = op{1-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAd = $_ZAd";
}
multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<6> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty, ValueType zpr_ty,
SDPatternOperator intrinsic> {
def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
}
class sme2_dot_mla_add_sub_array_vg4_multi<bits<6> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
string mnemonic>
: I<(outs matrix_ty:$ZAd),
(ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm",
"", []>, Sched<[]>{
bits<3> Zm;
bits<3> Zn;
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000011;
let Inst{22} = op{5}; //sz
let Inst{21} = 0b1;
let Inst{20-18} = Zm;
let Inst{17-15} = 0b010;
let Inst{14-13} = Rv;
let Inst{12-10} = op{4-2};
let Inst{9-7} = Zn;
let Inst{6-5} = 0b00;
let Inst{4-3} = op{1-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAd = $_ZAd";
}
multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<6> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ValueType zpr_ty, SDPatternOperator intrinsic>{
def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
}
//===----------------------------------------------------------------------===//
// SME2 multiple vectors binary two or four registers
class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty>
: I<(outs matrix_ty:$ZAdn),
(ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm),
mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm",
"", []>, Sched<[]> {
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000011;
let Inst{22} = sz;
let Inst{21-19} = 0b100;
let Inst{18} = op{2};
let Inst{17} = 0b0;
let Inst{16} = vg4;
let Inst{15} = 0b0;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b111;
let Inst{5} = 0b0;
let Inst{4-3} = op{1-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAdn = $_ZAdn";
}
class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty>
: sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> {
bits<4> Zm;
let Inst{9-6} = Zm;
}
multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty,
ValueType vty,
SDPatternOperator intrinsic> {
def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
}
class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty>
: sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> {
bits<3> Zm;
let Inst{9-7} = Zm;
let Inst{6} = 0b0;
}
multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty,
ValueType vty,
SDPatternOperator intrinsic> {
def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
}
//===----------------------------------------------------------------------===//
// SME2 Multi-vector - Multiple and Single SVE Destructive
// Two and Four registers
class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op,
RegisterOperand vector_ty,
ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
mnemonic, "\t$Zdn, $_Zdn, $Zm",
"", []>, Sched<[]> {
bits<4> Zm;
bits<4> Zdn;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b10;
let Inst{19-16} = Zm;
let Inst{15-11} = 0b10100;
let Inst{10-5} = op{6-1};
let Inst{4-1} = Zdn;
let Inst{0} = op{0};
let Constraints = "$Zdn = $_Zdn";
}
multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
}
multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>;
def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
}
// SME2.1 fmax/fmin instructions.
multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> {
def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r,
ZPR4b16, mnemonic>;
}
class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op,
RegisterOperand vector_ty,
ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
mnemonic, "\t$Zdn, $_Zdn, $Zm",
"", []>, Sched<[]> {
bits<4> Zm;
bits<3> Zdn;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b10;
let Inst{19-16} = Zm;
let Inst{15-11} = 0b10101;
let Inst{10-5} = op{6-1};
let Inst{4-2} = Zdn;
let Inst{1} = 0b0;
let Inst{0} = op{0};
let Constraints = "$Zdn = $_Zdn";
}
multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
}
multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>;
def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
}
// SME2.1 fmax/fmin instructions.
multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> {
def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r,
ZPR4b16, mnemonic>;
}
class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
mnemonic, "\t$Zdn, $_Zdn, $Zm",
"", []>, Sched<[]> {
bits<4> Zm;
bits<4> Zdn;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21} = 0b1;
let Inst{20-17} = Zm;
let Inst{16-11} = 0b010110;
let Inst{10-5} = op{6-1};
let Inst{4-1} = Zdn;
let Inst{0} = op{0};
let Constraints = "$Zdn = $_Zdn";
}
multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
}
multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>;
def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
}
// SME2.1 fmax/fmin instructions.
multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> {
def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r,
mnemonic>;
}
class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
mnemonic, "\t$Zdn, $_Zdn, $Zm",
"", []>, Sched<[]> {
bits<3> Zm;
bits<3> Zdn;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21} = 0b1;
let Inst{20-18} = Zm;
let Inst{17-11} = 0b0010111;
let Inst{10-5} = op{6-1};
let Inst{4-2} = Zdn;
let Inst{1} = 0b0;
let Inst{0} = op{0};
let Constraints = "$Zdn = $_Zdn";
}
multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
}
multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>;
def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
}
// SME2.1 fmax/fmin instructions.
multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> {
def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r,
mnemonic>;
}
//===----------------------------------------------------------------------===//
// SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources
class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
RegisterOperand multi_vector_ty,
string mnemonic, string vg_acronym="">
: I<(outs MatrixOp32:$ZAda),
(ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3),
mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3",
"", []>, Sched<[]> {
bits<4> Zm;
bits<2> Rv;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = op0;
let Inst{21} = 0b0;
let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1);
let Inst{19-16} = Zm;
let Inst{14-13} = Rv;
let Inst{12} = 0b1;
let Inst{4-3} = op;
let Constraints = "$ZAda = $_ZAda";
}
multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
bits<3> i3;
bits<5> Zn;
bits<3> imm;
let Inst{15} = i3{2};
let Inst{11-10} = i3{1-0};
let Inst{9-5} = Zn;
let Inst{2-0} = imm;
}
def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
}
class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
: sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r,
mnemonic, "vgx2"> {
bits<3> i3;
bits<4> Zn;
bits<2> imm;
let Inst{15} = 0b0;
let Inst{11-10} = i3{2-1};
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{2} = i3{0};
let Inst{1-0} = imm;
}
multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
: sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r,
mnemonic, "vgx4"> {
bits<3> i3;
bits<3> Zn;
bits<2> imm;
let Inst{15} = 0b1;
let Inst{11-10} = i3{2-1};
let Inst{9-7} = Zn;
let Inst{6-5} = 0b00;
let Inst{2} = i3{0};
let Inst{1-0} = imm;
}
multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
class sme2_mla_long_array<bits<2>op0, bits<2> op, Operand index_ty,
RegisterOperand first_vector_ty,
RegisterOperand second_vector_ty,
string mnemonic, string vg_acronym="">
: I<(outs MatrixOp32:$ZAda),
(ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
"", []> , Sched<[]> {
bits<2> Rv;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = op0;
let Inst{21} = 0b1;
let Inst{15} = 0b0;
let Inst{14-13} = Rv;
let Inst{12-11} = 0b01;
let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0);
let Inst{4-3} = op;
let Constraints = "$ZAda = $_ZAda";
}
multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array<op0, op, uimm3s2range, ZPR16, ZPR4b16,
mnemonic> , SMEPseudo2Instr<NAME # _S, 1>{
bits<4> Zm;
bits<5> Zn;
bits<3> imm;
let Inst{20} = 0b0;
let Inst{19-16} = Zm;
let Inst{9-5} = Zn;
let Inst{2-0} = imm;
}
def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
}
class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op,
RegisterOperand first_vector_ty,
string mnemonic, string vg_acronym>
: sme2_mla_long_array<op0, op, uimm2s2range, first_vector_ty, ZPR4b16,
mnemonic, vg_acronym> {
bits<4> Zm;
bits<5> Zn;
bits<2> imm;
let Inst{20} = vg4;
let Inst{19-16} = Zm;
let Inst{9-5} = Zn;
let Inst{2} = 0b0;
let Inst{1-0} = imm;
}
multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic,
"vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic,
"vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic,
"vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic,
"vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<2> op>
: sme2_mla_long_array<op0, op, uimm2s2range, ZZ_h_mul_r, ZZ_h_mul_r, mnemonic,
"vgx2"> {
bits<4> Zm;
bits<4> Zn;
bits<2> imm;
let Inst{20-17} = Zm;
let Inst{16} = 0b0;
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{2} = 0b0;
let Inst{1-0} = imm;
}
multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
}
class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<2> op>
: sme2_mla_long_array<op0, op, uimm2s2range, ZZZZ_h_mul_r, ZZZZ_h_mul_r, mnemonic,
"vgx4"> {
bits<3> Zm;
bits<3> Zn;
bits<2> imm;
let Inst{20-18} = Zm;
let Inst{17} = 0b0;
let Inst{16} = 0b1;
let Inst{9-7} = Zn;
let Inst{6-5} = 0b00;
let Inst{2} = 0b0;
let Inst{1-0} = imm;
}
multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
(!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
}
//===----------------------------------------------------------------------===//
class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<4> Zd;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b10;
let Inst{19-16} = op{4-1};
let Inst{15-10} = 0b111000;
let Inst{9-6} = Zn;
let Inst{5} = op{0};
let Inst{4-1} = Zd;
let Inst{0} = 0b0;
}
// SME2 multi-vec FP to int convert two registers
// SME2 multi-vec int to FP two registers
multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> {
def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
}
// SME2 multi-vec FRINT two registers
multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> {
def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
}
class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<3> Zn;
bits<3> Zd;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b11;
let Inst{19-16} = op{6-3};
let Inst{15-10} = 0b111000;
let Inst{9-7} = Zn;
let Inst{6-5} = op{2-1};
let Inst{4-2} = Zd;
let Inst{1} = op{0};
let Inst{0} = 0b0;
}
// SME2 multi-vec FP to int convert four registers
// SME2 multi-vec int to FP four registers
multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
}
// SME2 multi-vec quadwords ZIP four registers
multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> {
def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r,
mnemonic>;
def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r,
mnemonic>;
def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
mnemonic>;
def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r,
mnemonic>;
}
// SME2 multi-vec quadwords ZIP four registers
multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> {
def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r,
mnemonic>;
}
// SME2 multi-vec FRINT four registers
multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
mnemonic>;
}
class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
: I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<5> Zd;
let Inst{31-23} = 0b110000010;
let Inst{22} = op{3};
let Inst{21-18} = 0b1000;
let Inst{17-16} = op{2-1};
let Inst{15-10} = 0b111000;
let Inst{9-6} = Zn;
let Inst{5} = op{0};
let Inst{4-0} = Zd;
}
// SME2 multi-vec FP down convert two registers
// SME2 multi-vec int down convert two registers
multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
ValueType in_vt, SDPatternOperator intrinsic> {
def NAME : sme2_cvt_vg2_single<mnemonic, op>;
def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
}
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<5> Zn;
bits<4> Zd;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-19} = 0b100;
let Inst{18-16} = op;
let Inst{15-10} = 0b111000;
let Inst{9-5} = Zn;
let Inst{4-1} = Zd;
let Inst{0} = u;
}
// SME2 multi-vec unpack two registers
multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> {
def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>;
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>;
def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>;
}
// SME2.1 multi-vec convert two registers
multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
}
class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<3> Zn;
bits<5> Zd;
let Inst{31-24} = 0b11000001;
let Inst{23} = sz;
let Inst{22} = op{2};
let Inst{21-10} = 0b110011111000;
let Inst{9-7} = Zn;
let Inst{6-5} = op{1-0};
let Inst{4-0} = Zd;
}
// SME2 multi-vec int down convert four registers
multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
}
class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<3> Zd;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-10} = 0b110101111000;
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-2} = Zd;
let Inst{1} = 0b0;
let Inst{0} = u;
}
// SME2 multi-vec UNPK four registers
multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> {
def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>;
def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>;
def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>;
}
//===----------------------------------------------------------------------===//
// SME2 multi-vec CLAMP registers
class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, string mnemonic>
: I<(outs multi_vector_ty:$Zd),
(ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm),
mnemonic, "\t$Zd, $Zn, $Zm",
"", []>, Sched<[]>{
bits<5> Zm;
bits<5> Zn;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
let Inst{15-13} = 0b110;
let Inst{12-10} = op1;
let Inst{9-5} = Zn;
let Inst{0} = u;
let Constraints = "$Zd = $_Zd";
}
class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, string mnemonic>
: sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
mnemonic>{
bits<4> Zd;
let Inst{4-1} = Zd;
}
multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{
def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>;
def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>;
def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>;
}
multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{
def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>;
def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>;
def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>;
def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>;
}
// SME2.1 multi-vec FCLAMP two registers
multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> {
def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16,
mnemonic>;
}
class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, string mnemonic>
: sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
mnemonic>{
bits<3> Zd;
let Inst{4-2} = Zd;
let Inst{1} = 0b0;
}
multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{
def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>;
def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>;
def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>;
}
multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{
def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>;
def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>;
def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>;
def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>;