blob: fd8ac0b328ebd19433838d632b10c562057a9b54 [file] [log] [blame]
//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the ARM NEON instruction set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// NEON-specific Operands.
//===----------------------------------------------------------------------===//
def nModImm : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
}
def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
def nImmSplatI8 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI8AsmOperand;
}
def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
def nImmSplatI16 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI16AsmOperand;
}
def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
def nImmSplatI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI32AsmOperand;
}
def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
def nImmVMOVI32Neg : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32NegAsmOperand;
}
def nImmVMOVF32 : Operand<i32> {
let PrintMethod = "printFPImmOperand";
let ParserMatchClass = FPImmOperand;
}
def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
def nImmSplatI64 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI64AsmOperand;
}
def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 8;
}]> {
let ParserMatchClass = VectorIndex8Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 4;
}]> {
let ParserMatchClass = VectorIndex16Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 2;
}]> {
let ParserMatchClass = VectorIndex32Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
// Register list of one D register.
def VecListOneDAsmOperand : AsmOperandClass {
let Name = "VecListOneD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
let ParserMatchClass = VecListOneDAsmOperand;
}
// Register list of two sequential D registers.
def VecListDPairAsmOperand : AsmOperandClass {
let Name = "VecListDPair";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
let ParserMatchClass = VecListDPairAsmOperand;
}
// Register list of three sequential D registers.
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
let ParserMatchClass = VecListThreeDAsmOperand;
}
// Register list of four sequential D registers.
def VecListFourDAsmOperand : AsmOperandClass {
let Name = "VecListFourD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
let ParserMatchClass = VecListFourDAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
def VecListDPairSpacedAsmOperand : AsmOperandClass {
let Name = "VecListDPairSpaced";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListDPairSpacedAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
def VecListThreeQAsmOperand : AsmOperandClass {
let Name = "VecListThreeQ";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
let ParserMatchClass = VecListThreeQAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
def VecListFourQAsmOperand : AsmOperandClass {
let Name = "VecListFourQ";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
let ParserMatchClass = VecListFourQAsmOperand;
}
// Register list of one D register, with "all lanes" subscripting.
def VecListOneDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListOneDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
let ParserMatchClass = VecListOneDAllLanesAsmOperand;
}
// Register list of two D registers, with "all lanes" subscripting.
def VecListDPairAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListDPairAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairAllLanes : RegisterOperand<DPair,
"printVectorListTwoAllLanes"> {
let ParserMatchClass = VecListDPairAllLanesAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListDPairSpacedAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
"printVectorListTwoSpacedAllLanes"> {
let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
}
// Register list of three D registers, with "all lanes" subscripting.
def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListThreeDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeDAllLanes : RegisterOperand<DPR,
"printVectorListThreeAllLanes"> {
let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
}
// Register list of three D registers spaced by 2 (three sequential Q regs).
def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListThreeQAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeQAllLanes : RegisterOperand<DPR,
"printVectorListThreeSpacedAllLanes"> {
let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
}
// Register list of four D registers, with "all lanes" subscripting.
def VecListFourDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListFourDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
let ParserMatchClass = VecListFourDAllLanesAsmOperand;
}
// Register list of four D registers spaced by 2 (four sequential Q regs).
def VecListFourQAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListFourQAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourQAllLanes : RegisterOperand<DPR,
"printVectorListFourSpacedAllLanes"> {
let ParserMatchClass = VecListFourQAllLanesAsmOperand;
}
// Register list of one D register, with byte lane subscripting.
def VecListOneDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListOneDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of two D registers with byte lane subscripting.
def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of two Q registers with half-word lane subscripting.
def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of three D registers with byte lane subscripting.
def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of three Q registers with half-word lane subscripting.
def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of four D registers with byte lane subscripting.
def VecListFourDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListFourDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of four Q registers with half-word lane subscripting.
def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListFourQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
def NEONvbsl : SDNode<"ARMISD::VBSL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>>;
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
// so the result is not constrained to match the source.
def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisVT<2, i32>]>>;
def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
// Use VLDM to load a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
def VLDMQIA
: PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
[(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
: PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
[(store (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VLDQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr), itin,
"$addr.addr = $wb">;
class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr), itin,
"$addr.addr = $wb">;
class VLDQQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
class VLDQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb, $src = $dst">;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x2,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
// ...with address register writeback:
multiclass VLD1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
multiclass VLD1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
// ...with 3 registers
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
(ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy, InstrItinClass itin> {
def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn), IIC_VLD3,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// ...alternate versions to be allocated odd register numbers:
def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn), IIC_VLD4,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
// ...alternate versions to be allocated odd register numbers:
def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// Classes for VLD*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VLDQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst),
(ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst),
(ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst),
(ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
// VLD1LN : Vector Load (single element to one lane)
class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
(ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
(ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6oneL32:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
(i32 (LoadOp addrmode6:$addr)),
imm:$lane))];
}
def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
let Inst{7-5} = lane{2-0};
}
def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
let Inst{7-6} = lane{1-0};
let Inst{5-4} = Rn{5-4};
}
def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
def : Pat<(vector_insert (v2f32 DPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
def : Pat<(vector_insert (v4f32 QPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$src = $Vd, $Rn.addr = $wb", []> {
let DecoderMethod = "DecodeVLD1LN";
}
def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{4};
let Inst{4} = Rn{4};
}
def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
"\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
[]> {
let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
}
def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VLD4lnu, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
[]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN" ;
}
def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
(ins addrmode6dup:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD2dup,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
// ...with double-spaced registers
def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6dup:$Rn), IIC_VLD3dup,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
// ...with double-spaced registers (not used for codegen):
def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
class VLD3DUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
class VLD4DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1111, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6dup:$Rn), IIC_VLD4dup,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
// ...with double-spaced registers (not used for codegen):
def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
// ...with address register writeback:
class VLD4DUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1111, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
class VSTQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
class VSTQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
class VSTQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb">;
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
def VST1d16 : VST1D<{0,1,0,?}, "16">;
def VST1d32 : VST1D<{1,0,0,?}, "32">;
def VST1d64 : VST1D<{1,1,0,?}, "64">;
def VST1q8 : VST1Q<{0,0,?,?}, "8">;
def VST1q16 : VST1Q<{0,1,?,?}, "16">;
def VST1q32 : VST1Q<{1,0,?,?}, "32">;
def VST1q64 : VST1Q<{1,1,?,?}, "64">;
// ...with address register writeback:
multiclass VST1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
multiclass VST1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins addrmode6:$Rn, VecListThreeD:$Vd),
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
multiclass VST1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
def VST1d8T : VST1D3<{0,0,0,?}, "8">;
def VST1d16T : VST1D3<{0,1,0,?}, "16">;
def VST1d32T : VST1D3<{1,0,0,?}, "32">;
def VST1d64T : VST1D3<{1,1,0,?}, "64">;
defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins addrmode6:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
multiclass VST1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
InstrItinClass itin>
: NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy> {
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
multiclass VST2QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// ...alternate versions to be allocated odd register numbers:
def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
// ...alternate versions to be allocated odd register numbers:
def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNPseudo<IIC_VST1ln> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AdrMode:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
AdrMode:$Rn, am6offset:$Rm))]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNWBPseudo<IIC_VST1lnu> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr, am6offset:$offset))];
}
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
extractelt, addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
let Inst{4} = Rn{4};
}
def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
"\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST3lnu, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VST4lnu, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
// NEON pattern fragments
//===----------------------------------------------------------------------===//
// Extract D sub-registers of Q registers.
def DSubReg_i8_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
}]>;
def DSubReg_i16_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
}]>;
def DSubReg_i32_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
}]>;
def DSubReg_f64_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Extract S sub-registers of Q/D registers.
def SSubReg_f32_reg : SDNodeXForm<imm, [{
assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Translate lane numbers from Q registers to D subregs.
def SubReg_i8_lane : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N-