| //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // AArch64 Instruction definitions. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // ARM Instruction Predicate Definitions. |
| // |
| def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, |
| AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; |
| def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, |
| AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; |
| def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, |
| AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; |
| def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, |
| AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; |
| def HasNEON : Predicate<"Subtarget->hasNEON()">, |
| AssemblerPredicate<"FeatureNEON", "neon">; |
| def HasCrypto : Predicate<"Subtarget->hasCrypto()">, |
| AssemblerPredicate<"FeatureCrypto", "crypto">; |
| def HasDotProd : Predicate<"Subtarget->hasDotProd()">, |
| AssemblerPredicate<"FeatureDotProd", "dotprod">; |
| def HasCRC : Predicate<"Subtarget->hasCRC()">, |
| AssemblerPredicate<"FeatureCRC", "crc">; |
| def HasLSE : Predicate<"Subtarget->hasLSE()">, |
| AssemblerPredicate<"FeatureLSE", "lse">; |
| def HasRAS : Predicate<"Subtarget->hasRAS()">, |
| AssemblerPredicate<"FeatureRAS", "ras">; |
| def HasRDM : Predicate<"Subtarget->hasRDM()">, |
| AssemblerPredicate<"FeatureRDM", "rdm">; |
| def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; |
| def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, |
| AssemblerPredicate<"FeatureFullFP16", "fullfp16">; |
| def HasSPE : Predicate<"Subtarget->hasSPE()">, |
| AssemblerPredicate<"FeatureSPE", "spe">; |
| def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, |
| AssemblerPredicate<"FeatureFuseAES", |
| "fuse-aes">; |
| def HasSVE : Predicate<"Subtarget->hasSVE()">, |
| AssemblerPredicate<"FeatureSVE", "sve">; |
| def HasRCPC : Predicate<"Subtarget->hasRCPC()">, |
| AssemblerPredicate<"FeatureRCPC", "rcpc">; |
| |
| def IsLE : Predicate<"Subtarget->isLittleEndian()">; |
| def IsBE : Predicate<"!Subtarget->isLittleEndian()">; |
| def UseAlternateSExtLoadCVTF32 |
| : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; |
| |
| def UseNegativeImmediates |
| : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates", |
| "NegativeImmediates">; |
| |
| |
| //===----------------------------------------------------------------------===// |
| // AArch64-specific DAG Nodes. |
| // |
| |
| // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS |
| def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, |
| [SDTCisSameAs<0, 2>, |
| SDTCisSameAs<0, 3>, |
| SDTCisInt<0>, SDTCisVT<1, i32>]>; |
| |
| // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS |
| def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, |
| [SDTCisSameAs<0, 1>, |
| SDTCisSameAs<0, 2>, |
| SDTCisInt<0>, |
| SDTCisVT<3, i32>]>; |
| |
| // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS |
| def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, |
| [SDTCisSameAs<0, 2>, |
| SDTCisSameAs<0, 3>, |
| SDTCisInt<0>, |
| SDTCisVT<1, i32>, |
| SDTCisVT<4, i32>]>; |
| |
| def SDT_AArch64Brcond : SDTypeProfile<0, 3, |
| [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, |
| SDTCisVT<2, i32>]>; |
| def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; |
| def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, |
| SDTCisVT<2, OtherVT>]>; |
| |
| |
| def SDT_AArch64CSel : SDTypeProfile<1, 4, |
| [SDTCisSameAs<0, 1>, |
| SDTCisSameAs<0, 2>, |
| SDTCisInt<3>, |
| SDTCisVT<4, i32>]>; |
| def SDT_AArch64CCMP : SDTypeProfile<1, 5, |
| [SDTCisVT<0, i32>, |
| SDTCisInt<1>, |
| SDTCisSameAs<1, 2>, |
| SDTCisInt<3>, |
| SDTCisInt<4>, |
| SDTCisVT<5, i32>]>; |
| def SDT_AArch64FCCMP : SDTypeProfile<1, 5, |
| [SDTCisVT<0, i32>, |
| SDTCisFP<1>, |
| SDTCisSameAs<1, 2>, |
| SDTCisInt<3>, |
| SDTCisInt<4>, |
| SDTCisVT<5, i32>]>; |
| def SDT_AArch64FCmp : SDTypeProfile<0, 2, |
| [SDTCisFP<0>, |
| SDTCisSameAs<0, 1>]>; |
| def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; |
| def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; |
| def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, |
| SDTCisSameAs<0, 1>, |
| SDTCisSameAs<0, 2>]>; |
| def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; |
| def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; |
| def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, |
| SDTCisInt<2>, SDTCisInt<3>]>; |
| def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; |
| def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, |
| SDTCisSameAs<0,2>, SDTCisInt<3>]>; |
| def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; |
| |
| def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; |
| def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; |
| def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; |
| def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, |
| SDTCisSameAs<0,2>]>; |
| def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, |
| SDTCisSameAs<0,2>, |
| SDTCisSameAs<0,3>]>; |
| def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; |
| def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; |
| |
| def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; |
| |
| def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, |
| SDTCisPtrTy<1>]>; |
| |
| // Generates the general dynamic sequences, i.e. |
| // adrp x0, :tlsdesc:var |
| // ldr x1, [x0, #:tlsdesc_lo12:var] |
| // add x0, x0, #:tlsdesc_lo12:var |
| // .tlsdesccall var |
| // blr x1 |
| |
| // (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) |
| // number of operands (the variable) |
| def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, |
| [SDTCisPtrTy<0>]>; |
| |
| def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, |
| [SDTCisVT<0, i64>, SDTCisVT<1, i32>, |
| SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, |
| SDTCisSameAs<1, 4>]>; |
| |
| |
| // Node definitions. |
| def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; |
| def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; |
| def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; |
| def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", |
| SDCallSeqStart<[ SDTCisVT<0, i32>, |
| SDTCisVT<1, i32> ]>, |
| [SDNPHasChain, SDNPOutGlue]>; |
| def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", |
| SDCallSeqEnd<[ SDTCisVT<0, i32>, |
| SDTCisVT<1, i32> ]>, |
| [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; |
| def AArch64call : SDNode<"AArch64ISD::CALL", |
| SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, |
| [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, |
| SDNPVariadic]>; |
| def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, |
| [SDNPHasChain]>; |
| def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, |
| [SDNPHasChain]>; |
| def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, |
| [SDNPHasChain]>; |
| def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, |
| [SDNPHasChain]>; |
| def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, |
| [SDNPHasChain]>; |
| |
| |
| def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; |
| def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; |
| def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; |
| def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; |
| def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, |
| [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; |
| def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; |
| def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; |
| def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, |
| [SDNPCommutative]>; |
| def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; |
| def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, |
| [SDNPCommutative]>; |
| def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; |
| def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; |
| |
| def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; |
| def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; |
| def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; |
| |
| def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; |
| |
| def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; |
| |
| def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; |
| def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; |
| def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; |
| def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; |
| def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; |
| |
| def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; |
| def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; |
| def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; |
| def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; |
| def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; |
| def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; |
| |
| def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; |
| def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; |
| def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; |
| def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; |
| def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; |
| def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; |
| def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; |
| |
| def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; |
| def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; |
| def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; |
| def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; |
| |
| def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; |
| def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; |
| def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; |
| def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; |
| def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; |
| def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; |
| def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; |
| def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; |
| |
| def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; |
| def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; |
| def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; |
| |
| def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; |
| def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; |
| def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; |
| def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; |
| def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; |
| |
| def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; |
| def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; |
| def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; |
| |
| def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; |
| def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; |
| def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; |
| def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; |
| def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; |
| def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), |
| (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; |
| |
| def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; |
| def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; |
| def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; |
| def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; |
| def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; |
| |
| def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; |
| def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; |
| |
| def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; |
| |
| def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, |
| [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; |
| |
| def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, |
| [SDNPHasChain, SDNPSideEffect]>; |
| |
| def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; |
| def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; |
| |
| def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", |
| SDT_AArch64TLSDescCallSeq, |
| [SDNPInGlue, SDNPOutGlue, SDNPHasChain, |
| SDNPVariadic]>; |
| |
| |
| def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", |
| SDT_AArch64WrapperLarge>; |
| |
| def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; |
| |
| def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, |
| SDTCisSameAs<1, 2>]>; |
| def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; |
| def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; |
| |
| def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; |
| def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; |
| def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; |
| def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; |
| |
| def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; |
| def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; |
| def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; |
| def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; |
| def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; |
| def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; |
| |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| |
| // AArch64 Instruction Predicate Definitions. |
| // We could compute these on a per-module basis but doing so requires accessing |
| // the Function object through the <Target>Subtarget and objections were raised |
| // to that (see post-commit review comments for r301750). |
| let RecomputePerFunction = 1 in { |
| def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">; |
| def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">; |
| // Avoid generating STRQro if it is slow, unless we're optimizing for code size. |
| def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction()->optForSize()">; |
| } |
| |
| include "AArch64InstrFormats.td" |
| |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Miscellaneous instructions. |
| //===----------------------------------------------------------------------===// |
| |
| let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { |
| // We set Sched to empty list because we expect these instructions to simply get |
| // removed in most cases. |
| def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), |
| [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, |
| Sched<[]>; |
| def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), |
| [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, |
| Sched<[]>; |
| } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 |
| |
| let isReMaterializable = 1, isCodeGenOnly = 1 in { |
| // FIXME: The following pseudo instructions are only needed because remat |
| // cannot handle multiple instructions. When that changes, they can be |
| // removed, along with the AArch64Wrapper node. |
| |
| let AddedComplexity = 10 in |
| def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), |
| [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, |
| Sched<[WriteLDAdr]>; |
| |
| // The MOVaddr instruction should match only when the add is not folded |
| // into a load or store address. |
| def MOVaddr |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), |
| tglobaladdr:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| def MOVaddrJT |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), |
| tjumptable:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| def MOVaddrCP |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), |
| tconstpool:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| def MOVaddrBA |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), |
| tblockaddress:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| def MOVaddrTLS |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), |
| tglobaltlsaddr:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| def MOVaddrEXT |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), |
| [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), |
| texternalsym:$low))]>, |
| Sched<[WriteAdrAdr]>; |
| |
| } // isReMaterializable, isCodeGenOnly |
| |
| def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), |
| (LOADgot tglobaltlsaddr:$addr)>; |
| |
| def : Pat<(AArch64LOADgot texternalsym:$addr), |
| (LOADgot texternalsym:$addr)>; |
| |
| def : Pat<(AArch64LOADgot tconstpool:$addr), |
| (LOADgot tconstpool:$addr)>; |
| |
| //===----------------------------------------------------------------------===// |
| // System instructions. |
| //===----------------------------------------------------------------------===// |
| |
| def HINT : HintI<"hint">; |
| def : InstAlias<"nop", (HINT 0b000)>; |
| def : InstAlias<"yield",(HINT 0b001)>; |
| def : InstAlias<"wfe", (HINT 0b010)>; |
| def : InstAlias<"wfi", (HINT 0b011)>; |
| def : InstAlias<"sev", (HINT 0b100)>; |
| def : InstAlias<"sevl", (HINT 0b101)>; |
| def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; |
| |
| // v8.2a Statistical Profiling extension |
| def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; |
| |
| // As far as LLVM is concerned this writes to the system's exclusive monitors. |
| let mayLoad = 1, mayStore = 1 in |
| def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; |
| |
| // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot |
| // model patterns with sufficiently fine granularity. |
| let mayLoad = ?, mayStore = ? in { |
| def DMB : CRmSystemI<barrier_op, 0b101, "dmb", |
| [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; |
| |
| def DSB : CRmSystemI<barrier_op, 0b100, "dsb", |
| [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; |
| |
| def ISB : CRmSystemI<barrier_op, 0b110, "isb", |
| [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; |
| } |
| |
| // ARMv8.2 Dot Product |
| let Predicates = [HasDotProd] in { |
| def UDOT2S : BaseSIMDThreeSameVectorDot<0, 1, "udot", ".2s", ".8b">; |
| def SDOT2S : BaseSIMDThreeSameVectorDot<0, 0, "sdot", ".2s", ".8b">; |
| def UDOT4S : BaseSIMDThreeSameVectorDot<1, 1, "udot", ".4s", ".16b">; |
| def SDOT4S : BaseSIMDThreeSameVectorDot<1, 0, "sdot", ".4s", ".16b">; |
| def UDOTIDX2S : BaseSIMDThreeSameVectorDotIndex<0, 1, "udot", ".2s", ".8b", ".4b">; |
| def SDOTIDX2S : BaseSIMDThreeSameVectorDotIndex<0, 0, "sdot", ".2s", ".8b", ".4b">; |
| def UDOTIDX4S : BaseSIMDThreeSameVectorDotIndex<1, 1, "udot", ".4s", ".16b", ".4b">; |
| def SDOTIDX4S : BaseSIMDThreeSameVectorDotIndex<1, 0, "sdot", ".4s", ".16b", ".4b">; |
| } |
| |
| let Predicates = [HasRCPC] in { |
| // v8.3 Release Consistent Processor Consistent support, optional in v8.2. |
| def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; |
| def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; |
| def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; |
| def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; |
| } |
| |
| // v8.3a complex add and multiply-accumulate. No predicate here, that is done |
| // inside the multiclass as the FP16 versions need different predicates. |
| defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, |
| "fcmla", null_frag>; |
| defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, |
| "fcadd", null_frag>; |
| defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", |
| null_frag>; |
| |
| let Predicates = [HasV8_3a] in { |
| // v8.3a Pointer Authentication |
| let Uses = [LR], Defs = [LR] in { |
| def PACIAZ : SystemNoOperands<0b000, "paciaz">; |
| def PACIBZ : SystemNoOperands<0b010, "pacibz">; |
| def AUTIAZ : SystemNoOperands<0b100, "autiaz">; |
| def AUTIBZ : SystemNoOperands<0b110, "autibz">; |
| } |
| let Uses = [LR, SP], Defs = [LR] in { |
| def PACIASP : SystemNoOperands<0b001, "paciasp">; |
| def PACIBSP : SystemNoOperands<0b011, "pacibsp">; |
| def AUTIASP : SystemNoOperands<0b101, "autiasp">; |
| def AUTIBSP : SystemNoOperands<0b111, "autibsp">; |
| } |
| let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { |
| def PACIA1716 : SystemNoOperands<0b000, "pacia1716">; |
| def PACIB1716 : SystemNoOperands<0b010, "pacib1716">; |
| def AUTIA1716 : SystemNoOperands<0b100, "autia1716">; |
| def AUTIB1716 : SystemNoOperands<0b110, "autib1716">; |
| } |
| |
| let Uses = [LR], Defs = [LR], CRm = 0b0000 in { |
| def XPACLRI : SystemNoOperands<0b111, "xpaclri">; |
| } |
| |
| multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> { |
| def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>; |
| def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>; |
| def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>; |
| def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>; |
| def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>; |
| def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>; |
| def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>; |
| def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>; |
| } |
| |
| defm PAC : SignAuth<0b000, 0b010, "pac">; |
| defm AUT : SignAuth<0b001, 0b011, "aut">; |
| |
| def XPACI : SignAuthZero<0b100, 0b00, "xpaci">; |
| def XPACD : SignAuthZero<0b100, 0b01, "xpacd">; |
| def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>; |
| |
| // Combined Instructions |
| def BRAA : AuthBranchTwoOperands<0, 0, "braa">; |
| def BRAB : AuthBranchTwoOperands<0, 1, "brab">; |
| def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; |
| def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; |
| |
| def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; |
| def BRABZ : AuthOneOperand<0b000, 1, "brabz">; |
| def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; |
| def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; |
| |
| let isReturn = 1 in { |
| def RETAA : AuthReturn<0b010, 0, "retaa">; |
| def RETAB : AuthReturn<0b010, 1, "retab">; |
| def ERETAA : AuthReturn<0b100, 0, "eretaa">; |
| def ERETAB : AuthReturn<0b100, 1, "eretab">; |
| } |
| |
| defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; |
| defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; |
| |
| // v8.3a floating point conversion for javascript |
| let Predicates = [HasV8_3a, HasFPARMv8] in |
| def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, |
| "fjcvtzs", []> { |
| let Inst{31} = 0; |
| } |
| |
| } // HasV8_3A |
| |
| def : InstAlias<"clrex", (CLREX 0xf)>; |
| def : InstAlias<"isb", (ISB 0xf)>; |
| |
| def MRS : MRSI; |
| def MSR : MSRI; |
| def MSRpstateImm1 : MSRpstateImm0_1; |
| def MSRpstateImm4 : MSRpstateImm0_15; |
| |
| // The thread pointer (on Linux, at least, where this has been implemented) is |
| // TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. |
| let hasSideEffects = 0 in |
| def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), |
| [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; |
| |
| // The cycle counter PMC register is PMCCNTR_EL0. |
| let Predicates = [HasPerfMon] in |
| def : Pat<(readcyclecounter), (MRS 0xdce8)>; |
| |
| // Generic system instructions |
| def SYSxt : SystemXtI<0, "sys">; |
| def SYSLxt : SystemLXtI<1, "sysl">; |
| |
| def : InstAlias<"sys $op1, $Cn, $Cm, $op2", |
| (SYSxt imm0_7:$op1, sys_cr_op:$Cn, |
| sys_cr_op:$Cm, imm0_7:$op2, XZR)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Move immediate instructions. |
| //===----------------------------------------------------------------------===// |
| |
| defm MOVK : InsertImmediate<0b11, "movk">; |
| defm MOVN : MoveImmediate<0b00, "movn">; |
| |
| let PostEncoderMethod = "fixMOVZ" in |
| defm MOVZ : MoveImmediate<0b10, "movz">; |
| |
| // First group of aliases covers an implicit "lsl #0". |
| def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; |
| def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; |
| def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; |
| def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; |
| def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; |
| def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; |
| |
| // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. |
| def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; |
| def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; |
| def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; |
| |
| def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; |
| def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; |
| def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; |
| |
| def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>; |
| def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; |
| def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; |
| |
| def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; |
| |
| def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; |
| |
| def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; |
| def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; |
| |
| // Final group of aliases covers true "mov $Rd, $imm" cases. |
| multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, |
| int width, int shift> { |
| def _asmoperand : AsmOperandClass { |
| let Name = basename # width # "_lsl" # shift # "MovAlias"; |
| let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " |
| # shift # ">"; |
| let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; |
| } |
| |
| def _movimm : Operand<i32> { |
| let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); |
| } |
| |
| def : InstAlias<"mov $Rd, $imm", |
| (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; |
| } |
| |
| defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; |
| defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; |
| |
| defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; |
| defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; |
| defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; |
| defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; |
| |
| defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; |
| defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; |
| |
| defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; |
| defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; |
| defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; |
| defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; |
| |
| let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, |
| isAsCheapAsAMove = 1 in { |
| // FIXME: The following pseudo instructions are only needed because remat |
| // cannot handle multiple instructions. When that changes, we can select |
| // directly to the real instructions and get rid of these pseudos. |
| |
| def MOVi32imm |
| : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), |
| [(set GPR32:$dst, imm:$src)]>, |
| Sched<[WriteImm]>; |
| def MOVi64imm |
| : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), |
| [(set GPR64:$dst, imm:$src)]>, |
| Sched<[WriteImm]>; |
| } // isReMaterializable, isCodeGenOnly |
| |
| // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the |
| // eventual expansion code fewer bits to worry about getting right. Marshalling |
| // the types is a little tricky though: |
| def i64imm_32bit : ImmLeaf<i64, [{ |
| return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); |
| }]>; |
| |
| def s64imm_32bit : ImmLeaf<i64, [{ |
| int64_t Imm64 = static_cast<int64_t>(Imm); |
| return Imm64 >= std::numeric_limits<int32_t>::min() && |
| Imm64 <= std::numeric_limits<int32_t>::max(); |
| }]>; |
| |
| def trunc_imm : SDNodeXForm<imm, [{ |
| return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); |
| }]>; |
| |
| def : Pat<(i64 i64imm_32bit:$src), |
| (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; |
| |
| // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). |
| def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ |
| return CurDAG->getTargetConstant( |
| N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); |
| }]>; |
| |
| def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ |
| return CurDAG->getTargetConstant( |
| N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); |
| }]>; |
| |
| |
| def : Pat<(f32 fpimm:$in), |
| (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; |
| def : Pat<(f64 fpimm:$in), |
| (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; |
| |
| |
| // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK |
| // sequences. |
| def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, |
| tglobaladdr:$g1, tglobaladdr:$g0), |
| (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), |
| tglobaladdr:$g1, 16), |
| tglobaladdr:$g2, 32), |
| tglobaladdr:$g3, 48)>; |
| |
| def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, |
| tblockaddress:$g1, tblockaddress:$g0), |
| (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), |
| tblockaddress:$g1, 16), |
| tblockaddress:$g2, 32), |
| tblockaddress:$g3, 48)>; |
| |
| def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, |
| tconstpool:$g1, tconstpool:$g0), |
| (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), |
| tconstpool:$g1, 16), |
| tconstpool:$g2, 32), |
| tconstpool:$g3, 48)>; |
| |
| def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, |
| tjumptable:$g1, tjumptable:$g0), |
| (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), |
| tjumptable:$g1, 16), |
| tjumptable:$g2, 32), |
| tjumptable:$g3, 48)>; |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Arithmetic instructions. |
| //===----------------------------------------------------------------------===// |
| |
| // Add/subtract with carry. |
| defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; |
| defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; |
| |
| def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; |
| def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; |
| def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; |
| def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; |
| |
| // Add/subtract |
| defm ADD : AddSub<0, "add", "sub", add>; |
| defm SUB : AddSub<1, "sub", "add">; |
| |
| def : InstAlias<"mov $dst, $src", |
| (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; |
| def : InstAlias<"mov $dst, $src", |
| (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; |
| def : InstAlias<"mov $dst, $src", |
| (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; |
| def : InstAlias<"mov $dst, $src", |
| (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; |
| |
| defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; |
| defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; |
| |
| // Use SUBS instead of SUB to enable CSE between SUBS and SUB. |
| def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), |
| (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; |
| def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), |
| (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; |
| def : Pat<(sub GPR32:$Rn, GPR32:$Rm), |
| (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; |
| def : Pat<(sub GPR64:$Rn, GPR64:$Rm), |
| (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; |
| def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), |
| (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; |
| def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), |
| (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; |
| let AddedComplexity = 1 in { |
| def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), |
| (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; |
| def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), |
| (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; |
| } |
| |
| // Because of the immediate format for add/sub-imm instructions, the |
| // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). |
| // These patterns capture that transformation. |
| let AddedComplexity = 1 in { |
| def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), |
| (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; |
| def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), |
| (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; |
| def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), |
| (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; |
| def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), |
| (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; |
| } |
| |
| // Because of the immediate format for add/sub-imm instructions, the |
| // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). |
| // These patterns capture that transformation. |
| let AddedComplexity = 1 in { |
| def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), |
| (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; |
| def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), |
| (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; |
| def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), |
| (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; |
| def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), |
| (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; |
| } |
| |
| def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; |
| def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; |
| def : InstAlias<"neg $dst, $src$shift", |
| (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; |
| def : InstAlias<"neg $dst, $src$shift", |
| (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; |
| |
| def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; |
| def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; |
| def : InstAlias<"negs $dst, $src$shift", |
| (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; |
| def : InstAlias<"negs $dst, $src$shift", |
| (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; |
| |
| |
| // Unsigned/Signed divide |
| defm UDIV : Div<0, "udiv", udiv>; |
| defm SDIV : Div<1, "sdiv", sdiv>; |
| |
| def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; |
| def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; |
| def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; |
| def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; |
| |
| // Variable shift |
| defm ASRV : Shift<0b10, "asr", sra>; |
| defm LSLV : Shift<0b00, "lsl", shl>; |
| defm LSRV : Shift<0b01, "lsr", srl>; |
| defm RORV : Shift<0b11, "ror", rotr>; |
| |
| def : ShiftAlias<"asrv", ASRVWr, GPR32>; |
| def : ShiftAlias<"asrv", ASRVXr, GPR64>; |
| def : ShiftAlias<"lslv", LSLVWr, GPR32>; |
| def : ShiftAlias<"lslv", LSLVXr, GPR64>; |
| def : ShiftAlias<"lsrv", LSRVWr, GPR32>; |
| def : ShiftAlias<"lsrv", LSRVXr, GPR64>; |
| def : ShiftAlias<"rorv", RORVWr, GPR32>; |
| def : ShiftAlias<"rorv", RORVXr, GPR64>; |
| |
| // Multiply-add |
| let AddedComplexity = 5 in { |
| defm MADD : MulAccum<0, "madd", add>; |
| defm MSUB : MulAccum<1, "msub", sub>; |
| |
| def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), |
| (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; |
| def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), |
| (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; |
| |
| def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), |
| (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; |
| def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), |
| (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; |
| def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), |
| (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; |
| def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), |
| (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; |
| } // AddedComplexity = 5 |
| |
| let AddedComplexity = 5 in { |
| def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; |
| def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; |
| def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; |
| def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; |
| |
| def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), |
| (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; |
| def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), |
| (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; |
| |
| def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), |
| (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; |
| def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), |
| (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; |
| |
| def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), |
| (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), |
| (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), |
| (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), |
| (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| |
| def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), |
| (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), |
| (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), |
| (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), |
| (MOVi32imm (trunc_imm imm:$C)), XZR)>; |
| |
| def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), |
| (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), |
| (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), |
| GPR64:$Ra)), |
| (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), |
| (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| |
| def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), |
| (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), |
| (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), |
| (s64imm_32bit:$C)))), |
| (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), |
| (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; |
| } // AddedComplexity = 5 |
| |
| def : MulAccumWAlias<"mul", MADDWrrr>; |
| def : MulAccumXAlias<"mul", MADDXrrr>; |
| def : MulAccumWAlias<"mneg", MSUBWrrr>; |
| def : MulAccumXAlias<"mneg", MSUBXrrr>; |
| def : WideMulAccumAlias<"smull", SMADDLrrr>; |
| def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; |
| def : WideMulAccumAlias<"umull", UMADDLrrr>; |
| def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; |
| |
| // Multiply-high |
| def SMULHrr : MulHi<0b010, "smulh", mulhs>; |
| def UMULHrr : MulHi<0b110, "umulh", mulhu>; |
| |
| // CRC32 |
| def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; |
| def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; |
| def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; |
| def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; |
| |
| def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; |
| def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; |
| def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; |
| def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; |
| |
| // v8.1 atomic CAS |
| defm CAS : CompareAndSwap<0, 0, "">; |
| defm CASA : CompareAndSwap<1, 0, "a">; |
| defm CASL : CompareAndSwap<0, 1, "l">; |
| defm CASAL : CompareAndSwap<1, 1, "al">; |
| |
| // v8.1 atomic CASP |
| defm CASP : CompareAndSwapPair<0, 0, "">; |
| defm CASPA : CompareAndSwapPair<1, 0, "a">; |
| defm CASPL : CompareAndSwapPair<0, 1, "l">; |
| defm CASPAL : CompareAndSwapPair<1, 1, "al">; |
| |
| // v8.1 atomic SWP |
| defm SWP : Swap<0, 0, "">; |
| defm SWPA : Swap<1, 0, "a">; |
| defm SWPL : Swap<0, 1, "l">; |
| defm SWPAL : Swap<1, 1, "al">; |
| |
| // v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) |
| defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; |
| defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; |
| defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; |
| defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; |
| |
| defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; |
| defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; |
| defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; |
| defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; |
| |
| defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; |
| defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; |
| defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; |
| defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; |
| |
| defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; |
| defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; |
| defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; |
| defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; |
| |
| defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; |
| defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; |
| defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; |
| defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; |
| |
| defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; |
| defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; |
| defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; |
| defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; |
| |
| defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; |
| defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; |
| defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; |
| defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; |
| |
| defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; |
| defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; |
| defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; |
| defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; |
| |
| // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" |
| defm : STOPregister<"stadd","LDADD">; // STADDx |
| defm : STOPregister<"stclr","LDCLR">; // STCLRx |
| defm : STOPregister<"steor","LDEOR">; // STEORx |
| defm : STOPregister<"stset","LDSET">; // STSETx |
| defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx |
| defm : STOPregister<"stsmin","LDSMIN">;// STSMINx |
| defm : STOPregister<"stumax","LDUMAX">;// STUMAXx |
| defm : STOPregister<"stumin","LDUMIN">;// STUMINx |
| |
| //===----------------------------------------------------------------------===// |
| // Logical instructions. |
| //===----------------------------------------------------------------------===// |
| |
| // (immediate) |
| defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; |
| defm AND : LogicalImm<0b00, "and", and, "bic">; |
| defm EOR : LogicalImm<0b10, "eor", xor, "eon">; |
| defm ORR : LogicalImm<0b01, "orr", or, "orn">; |
| |
| // FIXME: these aliases *are* canonical sometimes (when movz can't be |
| // used). Actually, it seems to be working right now, but putting logical_immXX |
| // here is a bit dodgy on the AsmParser side too. |
| def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, |
| logical_imm32:$imm), 0>; |
| def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, |
| logical_imm64:$imm), 0>; |
| |
| |
| // (register) |
| defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; |
| defm BICS : LogicalRegS<0b11, 1, "bics", |
| BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; |
| defm AND : LogicalReg<0b00, 0, "and", and>; |
| defm BIC : LogicalReg<0b00, 1, "bic", |
| BinOpFrag<(and node:$LHS, (not node:$RHS))>>; |
| defm EON : LogicalReg<0b10, 1, "eon", |
| BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; |
| defm EOR : LogicalReg<0b10, 0, "eor", xor>; |
| defm ORN : LogicalReg<0b01, 1, "orn", |
| BinOpFrag<(or node:$LHS, (not node:$RHS))>>; |
| defm ORR : LogicalReg<0b01, 0, "orr", or>; |
| |
| def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; |
| def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; |
| |
| def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; |
| def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; |
| |
| def : InstAlias<"mvn $Wd, $Wm$sh", |
| (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; |
| def : InstAlias<"mvn $Xd, $Xm$sh", |
| (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; |
| |
| def : InstAlias<"tst $src1, $src2", |
| (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; |
| def : InstAlias<"tst $src1, $src2", |
| (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; |
| |
| def : InstAlias<"tst $src1, $src2", |
| (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; |
| def : InstAlias<"tst $src1, $src2", |
| (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; |
| |
| def : InstAlias<"tst $src1, $src2$sh", |
| (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; |
| def : InstAlias<"tst $src1, $src2$sh", |
| (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; |
| |
| |
| def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; |
| def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; |
| |
| |
| //===----------------------------------------------------------------------===// |
| // One operand data processing instructions. |
| //===----------------------------------------------------------------------===// |
| |
| defm CLS : OneOperandData<0b101, "cls">; |
| defm CLZ : OneOperandData<0b100, "clz", ctlz>; |
| defm RBIT : OneOperandData<0b000, "rbit", bitreverse>; |
| |
| def REV16Wr : OneWRegData<0b001, "rev16", |
| UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; |
| def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; |
| |
| def : Pat<(cttz GPR32:$Rn), |
| (CLZWr (RBITWr GPR32:$Rn))>; |
| def : Pat<(cttz GPR64:$Rn), |
| (CLZXr (RBITXr GPR64:$Rn))>; |
| def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), |
| (i32 1))), |
| (CLSWr GPR32:$Rn)>; |
| def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), |
| (i64 1))), |
| (CLSXr GPR64:$Rn)>; |
| |
| // Unlike the other one operand instructions, the instructions with the "rev" |
| // mnemonic do *not* just different in the size bit, but actually use different |
| // opcode bits for the different sizes. |
| def REVWr : OneWRegData<0b010, "rev", bswap>; |
| def REVXr : OneXRegData<0b011, "rev", bswap>; |
| def REV32Xr : OneXRegData<0b010, "rev32", |
| UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; |
| |
| def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; |
| |
| // The bswap commutes with the rotr so we want a pattern for both possible |
| // orders. |
| def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; |
| def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Bitfield immediate extraction instruction. |
| //===----------------------------------------------------------------------===// |
| let hasSideEffects = 0 in |
| defm EXTR : ExtractImm<"extr">; |
| def : InstAlias<"ror $dst, $src, $shift", |
| (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; |
| def : InstAlias<"ror $dst, $src, $shift", |
| (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; |
| |
| def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), |
| (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; |
| def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), |
| (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Other bitfield immediate instructions. |
| //===----------------------------------------------------------------------===// |
| let hasSideEffects = 0 in { |
| defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; |
| defm SBFM : BitfieldImm<0b00, "sbfm">; |
| defm UBFM : BitfieldImm<0b10, "ubfm">; |
| } |
| |
| def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = (32 - N->getZExtValue()) & 0x1f; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 31 - N->getZExtValue(); |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| // min(7, 31 - shift_amt) |
| def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 31 - N->getZExtValue(); |
| enc = enc > 7 ? 7 : enc; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| // min(15, 31 - shift_amt) |
| def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 31 - N->getZExtValue(); |
| enc = enc > 15 ? 15 : enc; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = (64 - N->getZExtValue()) & 0x3f; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 63 - N->getZExtValue(); |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| // min(7, 63 - shift_amt) |
| def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 63 - N->getZExtValue(); |
| enc = enc > 7 ? 7 : enc; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| // min(15, 63 - shift_amt) |
| def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 63 - N->getZExtValue(); |
| enc = enc > 15 ? 15 : enc; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| // min(31, 63 - shift_amt) |
| def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ |
| uint64_t enc = 63 - N->getZExtValue(); |
| enc = enc > 31 ? 31 : enc; |
| return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); |
| }]>; |
| |
| def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), |
| (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), |
| (i64 (i32shift_b imm0_31:$imm)))>; |
| def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), |
| (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), |
| (i64 (i64shift_b imm0_63:$imm)))>; |
| |
| let AddedComplexity = 10 in { |
| def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), |
| (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; |
| def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), |
| (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; |
| } |
| |
| def : InstAlias<"asr $dst, $src, $shift", |
| (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; |
| def : InstAlias<"asr $dst, $src, $shift", |
| (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; |
| def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; |
| def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; |
| def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; |
| def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; |
| def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; |
| |
| def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), |
| (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; |
| def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), |
| (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; |
| |
| def : InstAlias<"lsr $dst, $src, $shift", |
| (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; |
| def : InstAlias<"lsr $dst, $src, $shift", |
| (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; |
| def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; |
| def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; |
| def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; |
| def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; |
| def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Conditional comparison instructions. |
| //===----------------------------------------------------------------------===// |
| defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; |
| defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; |
| |
| //===----------------------------------------------------------------------===// |
| // Conditional select instructions. |
| //===----------------------------------------------------------------------===// |
| defm CSEL : CondSelect<0, 0b00, "csel">; |
| |
| def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; |
| defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; |
| defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; |
| defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; |
| |
| def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), |
| (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; |
| def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), |
| (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; |
| def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), |
| (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; |
| def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), |
| (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; |
| def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), |
| (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; |
| def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), |
| (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; |
| |
| def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), |
| (CSINCWr WZR, WZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), |
| (CSINCXr XZR, XZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), |
| (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), |
| (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), |
| (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; |
| def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), |
| (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; |
| def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), |
| (CSINVWr WZR, WZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), |
| (CSINVXr XZR, XZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), |
| (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), |
| (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; |
| def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), |
| (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; |
| def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), |
| (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; |
| |
| // The inverse of the condition code from the alias instruction is what is used |
| // in the aliased instruction. The parser all ready inverts the condition code |
| // for these aliases. |
| def : InstAlias<"cset $dst, $cc", |
| (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; |
| def : InstAlias<"cset $dst, $cc", |
| (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; |
| |
| def : InstAlias<"csetm $dst, $cc", |
| (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; |
| def : InstAlias<"csetm $dst, $cc", |
| (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; |
| |
| def : InstAlias<"cinc $dst, $src, $cc", |
| (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; |
| def : InstAlias<"cinc $dst, $src, $cc", |
| (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; |
| |
| def : InstAlias<"cinv $dst, $src, $cc", |
| (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; |
| def : InstAlias<"cinv $dst, $src, $cc", |
| (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; |
| |
| def : InstAlias<"cneg $dst, $src, $cc", |
| (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; |
| def : InstAlias<"cneg $dst, $src, $cc", |
| (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; |
| |
| //===----------------------------------------------------------------------===// |
| // PC-relative instructions. |
| //===----------------------------------------------------------------------===// |
| let isReMaterializable = 1 in { |
| let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { |
| def ADR : ADRI<0, "adr", adrlabel, []>; |
| } // hasSideEffects = 0 |
| |
| def ADRP : ADRI<1, "adrp", adrplabel, |
| [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; |
| } // isReMaterializable = 1 |
| |
| // page address of a constant pool entry, block address |
| def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; |
| def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Unconditional branch (register) instructions. |
| //===----------------------------------------------------------------------===// |
| |
| let isReturn = 1, isTerminator = 1, isBarrier = 1 in { |
| def RET : BranchReg<0b0010, "ret", []>; |
| def DRPS : SpecialReturn<0b0101, "drps">; |
| def ERET : SpecialReturn<0b0100, "eret">; |
| } // isReturn = 1, isTerminator = 1, isBarrier = 1 |
| |
| // Default to the LR register. |
| def : InstAlias<"ret", (RET LR)>; |
| |
| let isCall = 1, Defs = [LR], Uses = [SP] in { |
| def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; |
| } // isCall |
| |
| let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { |
| def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; |
| } // isBranch, isTerminator, isBarrier, isIndirectBranch |
| |
| // Create a separate pseudo-instruction for codegen to use so that we don't |
| // flag lr as used in every function. It'll be restored before the RET by the |
| // epilogue if it's legitimately used. |
| def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, |
| Sched<[WriteBrReg]> { |
| let isTerminator = 1; |
| let isBarrier = 1; |
| let isReturn = 1; |
| } |
| |
| // This is a directive-like pseudo-instruction. The purpose is to insert an |
| // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction |
| // (which in the usual case is a BLR). |
| let hasSideEffects = 1 in |
| def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { |
| let AsmString = ".tlsdesccall $sym"; |
| } |
| |
| // FIXME: maybe the scratch register used shouldn't be fixed to X1? |
| // FIXME: can "hasSideEffects be dropped? |
| let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, |
| isCodeGenOnly = 1 in |
| def TLSDESC_CALLSEQ |
| : Pseudo<(outs), (ins i64imm:$sym), |
| [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, |
| Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; |
| def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), |
| (TLSDESC_CALLSEQ texternalsym:$sym)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Conditional branch (immediate) instruction. |
| //===----------------------------------------------------------------------===// |
| def Bcc : BranchCond; |
| |
| //===----------------------------------------------------------------------===// |
| // Compare-and-branch instructions. |
| //===----------------------------------------------------------------------===// |
| defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; |
| defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; |
| |
| //===----------------------------------------------------------------------===// |
| // Test-bit-and-branch instructions. |
| //===----------------------------------------------------------------------===// |
| defm TBZ : TestBranch<0, "tbz", AArch64tbz>; |
| defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; |
| |
| //===----------------------------------------------------------------------===// |
| // Unconditional branch (immediate) instructions. |
| //===----------------------------------------------------------------------===// |
| let isBranch = 1, isTerminator = 1, isBarrier = 1 in { |
| def B : BranchImm<0, "b", [(br bb:$addr)]>; |
| } // isBranch, isTerminator, isBarrier |
| |
| let isCall = 1, Defs = [LR], Uses = [SP] in { |
| def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; |
| } // isCall |
| def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Exception generation instructions. |
| //===----------------------------------------------------------------------===// |
| def BRK : ExceptionGeneration<0b001, 0b00, "brk">; |
| def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; |
| def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; |
| def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; |
| def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; |
| def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; |
| def SMC : ExceptionGeneration<0b000, 0b11, "smc">; |
| def SVC : ExceptionGeneration<0b000, 0b01, "svc">; |
| |
| // DCPSn defaults to an immediate operand of zero if unspecified. |
| def : InstAlias<"dcps1", (DCPS1 0)>; |
| def : InstAlias<"dcps2", (DCPS2 0)>; |
| def : InstAlias<"dcps3", (DCPS3 0)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Load instructions. |
| //===----------------------------------------------------------------------===// |
| |
| // Pair (indexed, offset) |
| defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; |
| defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; |
| defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; |
| defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; |
| defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; |
| |
| defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; |
| |
| // Pair (pre-indexed) |
| def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; |
| def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; |
| def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; |
| def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; |
| def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; |
| |
| def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; |
| |
| // Pair (post-indexed) |
| def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; |
| def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; |
| def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; |
| def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; |
| def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; |
| |
| def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; |
| |
| |
| // Pair (no allocate) |
| defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; |
| defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; |
| defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; |
| defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; |
| defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; |
| |
| //--- |
| // (register offset) |
| //--- |
| |
| // Integer |
| defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; |
| defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; |
| defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; |
| defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; |
| |
| // Floating-point |
| defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; |
| defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; |
| defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; |
| defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; |
| defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; |
| |
| // Load sign-extended half-word |
| defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; |
| defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; |
| |
| // Load sign-extended byte |
| defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; |
| defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; |
| |
| // Load sign-extended word |
| defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; |
| |
| // Pre-fetch. |
| defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; |
| |
| // For regular load, we do not have any alignment requirement. |
| // Thus, it is safe to directly map the vector loads with interesting |
| // addressing modes. |
| // FIXME: We could do the same for bitconvert to floating point vectors. |
| multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, |
| ValueType ScalTy, ValueType VecTy, |
| Instruction LOADW, Instruction LOADX, |
| SubRegIndex sub> { |
| def : Pat<(VecTy (scalar_to_vector (ScalTy |
| (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), |
| (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), |
| (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), |
| sub)>; |
| |
| def : Pat<(VecTy (scalar_to_vector (ScalTy |
| (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), |
| (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), |
| (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), |
| sub)>; |
| } |
| |
| let AddedComplexity = 10 in { |
| defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; |
| defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; |
| |
| defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; |
| defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; |
| |
| defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; |
| defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; |
| |
| defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; |
| defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; |
| |
| defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; |
| defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; |
| |
| defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; |
| |
| defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; |
| |
| |
| def : Pat <(v1i64 (scalar_to_vector (i64 |
| (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, |
| ro_Wextend64:$extend))))), |
| (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; |
| |
| def : Pat <(v1i64 (scalar_to_vector (i64 |
| (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, |
| ro_Xextend64:$extend))))), |
| (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; |
| } |
| |
| // Match all load 64 bits width whose type is compatible with FPR64 |
| multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, |
| Instruction LOADW, Instruction LOADX> { |
| |
| def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), |
| (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; |
| |
| def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), |
| (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; |
| } |
| |
| let AddedComplexity = 10 in { |
| let Predicates = [IsLE] in { |
| // We must do vector loads with LD1 in big-endian. |
| defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; |
| defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; |
| defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; |
| defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; |
| defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; |
| } |
| |
| defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; |
| defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; |
| |
| // Match all load 128 bits width whose type is compatible with FPR128 |
| let Predicates = [IsLE] in { |
| // We must do vector loads with LD1 in big-endian. |
| defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; |
| defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; |
| } |
| } // AddedComplexity = 10 |
| |
| // zextload -> i64 |
| multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, |
| Instruction INSTW, Instruction INSTX> { |
| def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), |
| (SUBREG_TO_REG (i64 0), |
| (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), |
| sub_32)>; |
| |
| def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), |
| (SUBREG_TO_REG (i64 0), |
| (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), |
| sub_32)>; |
| } |
| |
| let AddedComplexity = 10 in { |
| defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; |
| defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; |
| defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; |
| |
| // zextloadi1 -> zextloadi8 |
| defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; |
| |
| // extload -> zextload |
| defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; |
| defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; |
| defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; |
| |
| // extloadi1 -> zextloadi8 |
| defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; |
| } |
| |
| |
| // zextload -> i64 |
| multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, |
| Instruction INSTW, Instruction INSTX> { |
| def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), |
| (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; |
| |
| def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), |
| (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; |
| |
| } |
| |
| let AddedComplexity = 10 in { |
| // extload -> zextload |
| defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; |
| defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; |
| defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; |
| |
| // zextloadi1 -> zextloadi8 |
| defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; |
| } |
| |
| //--- |
| // (unsigned immediate) |
| //--- |
| defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", |
| [(set GPR64:$Rt, |
| (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; |
| defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", |
| [(set GPR32:$Rt, |
| (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; |
| defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", |
| [(set FPR8:$Rt, |
| (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; |
| defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", |
| [(set (f16 FPR16:$Rt), |
| (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; |
| defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", |
| [(set (f32 FPR32:$Rt), |
| (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; |
| defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", |
| [(set (f64 FPR64:$Rt), |
| (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; |
| defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", |
| [(set (f128 FPR128:$Rt), |
| (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; |
| |
| // For regular load, we do not have any alignment requirement. |
| // Thus, it is safe to directly map the vector loads with interesting |
| // addressing modes. |
| // FIXME: We could do the same for bitconvert to floating point vectors. |
| def : Pat <(v8i8 (scalar_to_vector (i32 |
| (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), |
| (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), |
| (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; |
| def : Pat <(v16i8 (scalar_to_vector (i32 |
| (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), |
| (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), |
| (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; |
| def : Pat <(v4i16 (scalar_to_vector (i32 |
| (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), |
| (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), |
| (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; |
| def : Pat <(v8i16 (scalar_to_vector (i32 |
| (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), |
| (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), |
| (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; |
| def : Pat <(v2i32 (scalar_to_vector (i32 |
| (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), |
| (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), |
| (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; |
| def : Pat <(v4i32 (scalar_to_vector (i32 |
| (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), |
| (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), |
| (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; |
| def : Pat <(v1i64 (scalar_to_vector (i64 |
| (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat <(v2i64 (scalar_to_vector (i64 |
| (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), |
| (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; |
| |
| // Match all load 64 bits width whose type is compatible with FPR64 |
| let Predicates = [IsLE] in { |
| // We must use LD1 to perform vector loads in big-endian. |
| def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| } |
| def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), |
| (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; |
| |
| // Match all load 128 bits width whose type is compatible with FPR128 |
| let Predicates = [IsLE] in { |
| // We must use LD1 to perform vector loads in big-endian. |
| def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| } |
| def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), |
| (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; |
| |
| defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", |
| [(set GPR32:$Rt, |
| (zextloadi16 (am_indexed16 GPR64sp:$Rn, |
| uimm12s2:$offset)))]>; |
| defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", |
| [(set GPR32:$Rt, |
| (zextloadi8 (am_indexed8 GPR64sp:$Rn, |
| uimm12s1:$offset)))]>; |
| // zextload -> i64 |
| def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; |
| def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; |
| |
| // zextloadi1 -> zextloadi8 |
| def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; |
| def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; |
| |
| // extload -> zextload |
| def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), |
| (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; |
| def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; |
| def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; |
| def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; |
| |
| // load sign-extended half-word |
| defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", |
| [(set GPR32:$Rt, |
| (sextloadi16 (am_indexed16 GPR64sp:$Rn, |
| uimm12s2:$offset)))]>; |
| defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", |
| [(set GPR64:$Rt, |
| (sextloadi16 (am_indexed16 GPR64sp:$Rn, |
| uimm12s2:$offset)))]>; |
| |
| // load sign-extended byte |
| defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", |
| [(set GPR32:$Rt, |
| (sextloadi8 (am_indexed8 GPR64sp:$Rn, |
| uimm12s1:$offset)))]>; |
| defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", |
| [(set GPR64:$Rt, |
| (sextloadi8 (am_indexed8 GPR64sp:$Rn, |
| uimm12s1:$offset)))]>; |
| |
| // load sign-extended word |
| defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", |
| [(set GPR64:$Rt, |
| (sextloadi32 (am_indexed32 GPR64sp:$Rn, |
| uimm12s4:$offset)))]>; |
| |
| // load zero-extended word |
| def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; |
| |
| // Pre-fetch. |
| def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", |
| [(AArch64Prefetch imm:$Rt, |
| (am_indexed64 GPR64sp:$Rn, |
| uimm12s8:$offset))]>; |
| |
| def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; |
| |
| //--- |
| // (literal) |
| def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; |
| def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; |
| def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; |
| def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; |
| def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; |
| |
| // load sign-extended word |
| def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; |
| |
| // prefetch |
| def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; |
| // [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; |
| |
| //--- |
| // (unscaled immediate) |
| defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", |
| [(set GPR64:$Rt, |
| (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", |
| [(set GPR32:$Rt, |
| (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", |
| [(set FPR8:$Rt, |
| (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", |
| [(set FPR16:$Rt, |
| (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", |
| [(set (f32 FPR32:$Rt), |
| (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", |
| [(set (f64 FPR64:$Rt), |
| (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", |
| [(set (f128 FPR128:$Rt), |
| (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; |
| |
| defm LDURHH |
| : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", |
| [(set GPR32:$Rt, |
| (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURBB |
| : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", |
| [(set GPR32:$Rt, |
| (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; |
| |
| // Match all load 64 bits width whose type is compatible with FPR64 |
| let Predicates = [IsLE] in { |
| def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| } |
| def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), |
| (LDURDi GPR64sp:$Rn, simm9:$offset)>; |
| |
| // Match all load 128 bits width whose type is compatible with FPR128 |
| let Predicates = [IsLE] in { |
| def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), |
| (LDURQi GPR64sp:$Rn, simm9:$offset)>; |
| } |
| |
| // anyext -> zext |
| def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), |
| (LDURHHi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (LDURBBi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (LDURBBi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| // unscaled zext |
| def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), |
| (LDURHHi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (LDURBBi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (LDURBBi GPR64sp:$Rn, simm9:$offset)>; |
| def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| |
| |
| //--- |
| // LDR mnemonics fall back to LDUR for negative or unaligned offsets. |
| |
| // Define new assembler match classes as we want to only match these when |
| // the don't otherwise match the scaled addressing mode for LDR/STR. Don't |
| // associate a DiagnosticType either, as we want the diagnostic for the |
| // canonical form (the scaled operand) to take precedence. |
| class SImm9OffsetOperand<int Width> : AsmOperandClass { |
| let Name = "SImm9OffsetFB" # Width; |
| let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; |
| let RenderMethod = "addImmOperands"; |
| } |
| |
| def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; |
| def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; |
| def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; |
| def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; |
| def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; |
| |
| def simm9_offset_fb8 : Operand<i64> { |
| let ParserMatchClass = SImm9OffsetFB8Operand; |
| } |
| def simm9_offset_fb16 : Operand<i64> { |
| let ParserMatchClass = SImm9OffsetFB16Operand; |
| } |
| def simm9_offset_fb32 : Operand<i64> { |
| let ParserMatchClass = SImm9OffsetFB32Operand; |
| } |
| def simm9_offset_fb64 : Operand<i64> { |
| let ParserMatchClass = SImm9OffsetFB64Operand; |
| } |
| def simm9_offset_fb128 : Operand<i64> { |
| let ParserMatchClass = SImm9OffsetFB128Operand; |
| } |
| |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; |
| def : InstAlias<"ldr $Rt, [$Rn, $offset]", |
| (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; |
| |
| // zextload -> i64 |
| def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), |
| (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; |
| |
| // load sign-extended half-word |
| defm LDURSHW |
| : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", |
| [(set GPR32:$Rt, |
| (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; |
| defm LDURSHX |
|