| //===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// Definitions of RegBankLegalize Rules for all opcodes. |
| /// Implementation of container for all the Rules and search. |
| /// Fast search for most common case when Rule.Predicate checks LLT and |
| /// uniformity of register in operand 0. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPURegBankLegalizeRules.h" |
| #include "AMDGPUInstrInfo.h" |
| #include "GCNSubtarget.h" |
| #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| #include "llvm/CodeGen/MachineUniformityAnalysis.h" |
| #include "llvm/IR/IntrinsicsAMDGPU.h" |
| #include "llvm/Support/AMDGPUAddrSpace.h" |
| |
| #define DEBUG_TYPE "amdgpu-regbanklegalize" |
| |
| using namespace llvm; |
| using namespace AMDGPU; |
| |
| RegBankLLTMapping::RegBankLLTMapping( |
| std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList, |
| std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList, |
| LoweringMethodID LoweringMethod) |
| : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList), |
| LoweringMethod(LoweringMethod) {} |
| |
| PredicateMapping::PredicateMapping( |
| std::initializer_list<UniformityLLTOpPredicateID> OpList, |
| std::function<bool(const MachineInstr &)> TestFunc) |
| : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {} |
| |
| bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, |
| const MachineUniformityInfo &MUI, |
| const MachineRegisterInfo &MRI) { |
| switch (UniID) { |
| case S1: |
| return MRI.getType(Reg) == LLT::scalar(1); |
| case S16: |
| return MRI.getType(Reg) == LLT::scalar(16); |
| case S32: |
| return MRI.getType(Reg) == LLT::scalar(32); |
| case S64: |
| return MRI.getType(Reg) == LLT::scalar(64); |
| case P0: |
| return MRI.getType(Reg) == LLT::pointer(0, 64); |
| case P1: |
| return MRI.getType(Reg) == LLT::pointer(1, 64); |
| case P3: |
| return MRI.getType(Reg) == LLT::pointer(3, 32); |
| case P4: |
| return MRI.getType(Reg) == LLT::pointer(4, 64); |
| case P5: |
| return MRI.getType(Reg) == LLT::pointer(5, 32); |
| case V4S32: |
| return MRI.getType(Reg) == LLT::fixed_vector(4, 32); |
| case B32: |
| return MRI.getType(Reg).getSizeInBits() == 32; |
| case B64: |
| return MRI.getType(Reg).getSizeInBits() == 64; |
| case B96: |
| return MRI.getType(Reg).getSizeInBits() == 96; |
| case B128: |
| return MRI.getType(Reg).getSizeInBits() == 128; |
| case B256: |
| return MRI.getType(Reg).getSizeInBits() == 256; |
| case B512: |
| return MRI.getType(Reg).getSizeInBits() == 512; |
| case UniS1: |
| return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg); |
| case UniS16: |
| return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg); |
| case UniS32: |
| return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg); |
| case UniS64: |
| return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg); |
| case UniP0: |
| return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg); |
| case UniP1: |
| return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg); |
| case UniP3: |
| return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg); |
| case UniP4: |
| return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg); |
| case UniP5: |
| return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg); |
| case UniB32: |
| return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg); |
| case UniB64: |
| return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg); |
| case UniB96: |
| return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg); |
| case UniB128: |
| return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg); |
| case UniB256: |
| return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg); |
| case UniB512: |
| return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg); |
| case DivS1: |
| return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg); |
| case DivS32: |
| return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg); |
| case DivS64: |
| return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg); |
| case DivP0: |
| return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg); |
| case DivP1: |
| return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg); |
| case DivP3: |
| return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg); |
| case DivP4: |
| return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg); |
| case DivP5: |
| return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg); |
| case DivB32: |
| return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg); |
| case DivB64: |
| return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg); |
| case DivB96: |
| return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg); |
| case DivB128: |
| return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg); |
| case DivB256: |
| return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg); |
| case DivB512: |
| return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg); |
| case _: |
| return true; |
| default: |
| llvm_unreachable("missing matchUniformityAndLLT"); |
| } |
| } |
| |
| bool PredicateMapping::match(const MachineInstr &MI, |
| const MachineUniformityInfo &MUI, |
| const MachineRegisterInfo &MRI) const { |
| // Check LLT signature. |
| for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) { |
| if (OpUniformityAndTypes[i] == _) { |
| if (MI.getOperand(i).isReg()) |
| return false; |
| continue; |
| } |
| |
| // Remaining IDs check registers. |
| if (!MI.getOperand(i).isReg()) |
| return false; |
| |
| if (!matchUniformityAndLLT(MI.getOperand(i).getReg(), |
| OpUniformityAndTypes[i], MUI, MRI)) |
| return false; |
| } |
| |
| // More complex check. |
| if (TestFunc) |
| return TestFunc(MI); |
| |
| return true; |
| } |
| |
| SetOfRulesForOpcode::SetOfRulesForOpcode() {} |
| |
| SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes) |
| : FastTypes(FastTypes) {} |
| |
| UniformityLLTOpPredicateID LLTToId(LLT Ty) { |
| if (Ty == LLT::scalar(16)) |
| return S16; |
| if (Ty == LLT::scalar(32)) |
| return S32; |
| if (Ty == LLT::scalar(64)) |
| return S64; |
| if (Ty == LLT::fixed_vector(2, 16)) |
| return V2S16; |
| if (Ty == LLT::fixed_vector(2, 32)) |
| return V2S32; |
| if (Ty == LLT::fixed_vector(3, 32)) |
| return V3S32; |
| if (Ty == LLT::fixed_vector(4, 32)) |
| return V4S32; |
| return _; |
| } |
| |
| UniformityLLTOpPredicateID LLTToBId(LLT Ty) { |
| if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || |
| Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || |
| Ty == LLT::pointer(6, 32)) |
| return B32; |
| if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || |
| Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) || |
| Ty == LLT::pointer(4, 64)) |
| return B64; |
| if (Ty == LLT::fixed_vector(3, 32)) |
| return B96; |
| if (Ty == LLT::fixed_vector(4, 32)) |
| return B128; |
| return _; |
| } |
| |
| const RegBankLLTMapping & |
| SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI, |
| const MachineRegisterInfo &MRI, |
| const MachineUniformityInfo &MUI) const { |
| // Search in "Fast Rules". |
| // Note: if fast rules are enabled, RegBankLLTMapping must be added in each |
| // slot that could "match fast Predicate". If not, InvalidMapping is |
| // returned which results in failure, does not search "Slow Rules". |
| if (FastTypes != NoFastRules) { |
| Register Reg = MI.getOperand(0).getReg(); |
| int Slot; |
| if (FastTypes == StandardB) |
| Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg))); |
| else |
| Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg))); |
| |
| if (Slot != -1) |
| return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot]; |
| } |
| |
| // Slow search for more complex rules. |
| for (const RegBankLegalizeRule &Rule : Rules) { |
| if (Rule.Predicate.match(MI, MUI, MRI)) |
| return Rule.OperandMapping; |
| } |
| |
| LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); |
| llvm_unreachable("None of the rules defined for MI's opcode matched MI"); |
| } |
| |
| void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) { |
| Rules.push_back(Rule); |
| } |
| |
| void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty, |
| RegBankLLTMapping RuleApplyIDs) { |
| int Slot = getFastPredicateSlot(Ty); |
| assert(Slot != -1 && "Ty unsupported in this FastRulesTypes"); |
| Div[Slot] = RuleApplyIDs; |
| } |
| |
| void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty, |
| RegBankLLTMapping RuleApplyIDs) { |
| int Slot = getFastPredicateSlot(Ty); |
| assert(Slot != -1 && "Ty unsupported in this FastRulesTypes"); |
| Uni[Slot] = RuleApplyIDs; |
| } |
| |
| int SetOfRulesForOpcode::getFastPredicateSlot( |
| UniformityLLTOpPredicateID Ty) const { |
| switch (FastTypes) { |
| case Standard: { |
| switch (Ty) { |
| case S32: |
| return 0; |
| case S16: |
| return 1; |
| case S64: |
| return 2; |
| case V2S16: |
| return 3; |
| default: |
| return -1; |
| } |
| } |
| case StandardB: { |
| switch (Ty) { |
| case B32: |
| return 0; |
| case B64: |
| return 1; |
| case B96: |
| return 2; |
| case B128: |
| return 3; |
| default: |
| return -1; |
| } |
| } |
| case Vector: { |
| switch (Ty) { |
| case S32: |
| return 0; |
| case V2S32: |
| return 1; |
| case V3S32: |
| return 2; |
| case V4S32: |
| return 3; |
| default: |
| return -1; |
| } |
| } |
| default: |
| return -1; |
| } |
| } |
| |
| RegBankLegalizeRules::RuleSetInitializer |
| RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList, |
| FastRulesTypes FastTypes) { |
| return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes); |
| } |
| |
| RegBankLegalizeRules::RuleSetInitializer |
| RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList, |
| FastRulesTypes FastTypes) { |
| return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes); |
| } |
| |
| const SetOfRulesForOpcode & |
| RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { |
| unsigned Opc = MI.getOpcode(); |
| if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT || |
| Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS || |
| Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { |
| unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID(); |
| auto IRAIt = IRulesAlias.find(IntrID); |
| if (IRAIt == IRulesAlias.end()) { |
| LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); |
| llvm_unreachable("No rules defined for intrinsic opcode"); |
| } |
| return IRules.at(IRAIt->second); |
| } |
| |
| auto GRAIt = GRulesAlias.find(Opc); |
| if (GRAIt == GRulesAlias.end()) { |
| LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); |
| llvm_unreachable("No rules defined for generic opcode"); |
| } |
| return GRules.at(GRAIt->second); |
| } |
| |
| // Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'. |
| class Predicate { |
| private: |
| struct Elt { |
| // Save formula composed of Pred, '&&', '||' and '!' as a jump table. |
| // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C |
| // Sequences of && and || will be represented by jumps, for example: |
| // (A && B && ... X) or (A && B && ... X) || Y |
| // A == true jump to B |
| // A == false jump to end or Y, result is A(false) or Y |
| // (A || B || ... X) or (A || B || ... X) && Y |
| // A == true jump to end or Y, result is A(true) or Y |
| // A == false jump to B |
| // Notice that when negating expression, we simply flip Neg on each Pred |
| // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&). |
| std::function<bool(const MachineInstr &)> Pred; |
| bool Neg; // Neg of Pred is calculated before jump |
| unsigned TJumpOffset; |
| unsigned FJumpOffset; |
| }; |
| |
| SmallVector<Elt, 8> Expression; |
| |
| Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); }; |
| |
| public: |
| Predicate(std::function<bool(const MachineInstr &)> Pred) { |
| Expression.push_back({Pred, false, 1, 1}); |
| }; |
| |
| bool operator()(const MachineInstr &MI) const { |
| unsigned Idx = 0; |
| unsigned ResultIdx = Expression.size(); |
| bool Result; |
| do { |
| Result = Expression[Idx].Pred(MI); |
| Result = Expression[Idx].Neg ? !Result : Result; |
| if (Result) { |
| Idx += Expression[Idx].TJumpOffset; |
| } else { |
| Idx += Expression[Idx].FJumpOffset; |
| } |
| } while ((Idx != ResultIdx)); |
| |
| return Result; |
| }; |
| |
| Predicate operator!() const { |
| SmallVector<Elt, 8> NegExpression; |
| for (const Elt &ExprElt : Expression) { |
| NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset, |
| ExprElt.TJumpOffset}); |
| } |
| return Predicate(std::move(NegExpression)); |
| }; |
| |
| Predicate operator&&(const Predicate &RHS) const { |
| SmallVector<Elt, 8> AndExpression = Expression; |
| |
| unsigned RHSSize = RHS.Expression.size(); |
| unsigned ResultIdx = Expression.size(); |
| for (unsigned i = 0; i < ResultIdx; ++i) { |
| // LHS results in false, whole expression results in false. |
| if (i + AndExpression[i].FJumpOffset == ResultIdx) |
| AndExpression[i].FJumpOffset += RHSSize; |
| } |
| |
| AndExpression.append(RHS.Expression); |
| |
| return Predicate(std::move(AndExpression)); |
| } |
| |
| Predicate operator||(const Predicate &RHS) const { |
| SmallVector<Elt, 8> OrExpression = Expression; |
| |
| unsigned RHSSize = RHS.Expression.size(); |
| unsigned ResultIdx = Expression.size(); |
| for (unsigned i = 0; i < ResultIdx; ++i) { |
| // LHS results in true, whole expression results in true. |
| if (i + OrExpression[i].TJumpOffset == ResultIdx) |
| OrExpression[i].TJumpOffset += RHSSize; |
| } |
| |
| OrExpression.append(RHS.Expression); |
| |
| return Predicate(std::move(OrExpression)); |
| } |
| }; |
| |
| // Initialize rules |
| RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, |
| MachineRegisterInfo &_MRI) |
| : ST(&_ST), MRI(&_MRI) { |
| |
| addRulesForGOpcs({G_ADD}, Standard) |
| .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}) |
| .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); |
| |
| addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); |
| |
| addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) |
| .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) |
| .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) |
| .Div(B32, {{VgprB32}, {VgprB32, VgprB32}}) |
| .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}}) |
| .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32}); |
| |
| addRulesForGOpcs({G_SHL}, Standard) |
| .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) |
| .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}}) |
| .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}}); |
| |
| // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT |
| // and G_FREEZE here, rest is trivially regbankselected earlier |
| addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}}); |
| addRulesForGOpcs({G_CONSTANT}) |
| .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}}); |
| addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}}); |
| |
| addRulesForGOpcs({G_ICMP}) |
| .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}}) |
| .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}}); |
| |
| addRulesForGOpcs({G_FCMP}) |
| .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}}) |
| .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}}); |
| |
| addRulesForGOpcs({G_BRCOND}) |
| .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}}) |
| .Any({{DivS1}, {{}, {Vcc}}}); |
| |
| addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}}); |
| |
| addRulesForGOpcs({G_SELECT}, StandardB) |
| .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}}) |
| .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}); |
| |
| addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); |
| |
| // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY. |
| // It is up to user to deal with truncated bits. |
| addRulesForGOpcs({G_TRUNC}) |
| .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}) |
| // This is non-trivial. VgprToVccCopy is done using compare instruction. |
| .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}}); |
| |
| addRulesForGOpcs({G_ZEXT, G_SEXT}) |
| .Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
| .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) |
| .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) |
| .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}); |
| |
| bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12; |
| bool hasSMRDSmall = ST->hasScalarSubwordLoads(); |
| |
| Predicate isAlign16([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->getAlign() >= Align(16); |
| }); |
| |
| Predicate isAlign4([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->getAlign() >= Align(4); |
| }); |
| |
| Predicate isAtomicMMO([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->isAtomic(); |
| }); |
| |
| Predicate isUniMMO([](const MachineInstr &MI) -> bool { |
| return AMDGPUInstrInfo::isUniformMMO(*MI.memoperands_begin()); |
| }); |
| |
| Predicate isConst([](const MachineInstr &MI) -> bool { |
| // Address space in MMO be different then address space on pointer. |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| const unsigned AS = MMO->getAddrSpace(); |
| return AS == AMDGPUAS::CONSTANT_ADDRESS || |
| AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
| }); |
| |
| Predicate isVolatileMMO([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->isVolatile(); |
| }); |
| |
| Predicate isInvMMO([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->isInvariant(); |
| }); |
| |
| Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool { |
| return (*MI.memoperands_begin())->getFlags() & MONoClobber; |
| }); |
| |
| Predicate isNaturalAlignedSmall([](const MachineInstr &MI) -> bool { |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| const unsigned MemSize = 8 * MMO->getSize().getValue(); |
| return (MemSize == 16 && MMO->getAlign() >= Align(2)) || |
| (MemSize == 8 && MMO->getAlign() >= Align(1)); |
| }); |
| |
| auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) && |
| (isConst || isInvMMO || isNoClobberMMO); |
| |
| // clang-format off |
| addRulesForGOpcs({G_LOAD}) |
| .Any({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}}) |
| |
| .Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}}) |
| .Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}}) |
| .Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}}) |
| .Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}) |
| .Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}}) |
| .Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}}) |
| |
| .Any({{DivB32, UniP3}, {{VgprB32}, {VgprP3}}}) |
| .Any({{{UniB32, UniP3}, isAlign4 && isUL}, {{SgprB32}, {SgprP3}}}) |
| .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}}) |
| |
| .Any({{{DivB256, DivP4}}, {{VgprB256}, {VgprP4}, SplitLoad}}) |
| .Any({{{UniB32, UniP4}, isNaturalAlignedSmall && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) // i8 and i16 load |
| .Any({{{UniB32, UniP4}, isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) |
| .Any({{{UniB96, UniP4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasUnalignedLoads) |
| .Any({{{UniB96, UniP4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasUnalignedLoads) |
| .Any({{{UniB96, UniP4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasUnalignedLoads) |
| .Any({{{UniB256, UniP4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}}) |
| .Any({{{UniB512, UniP4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}}) |
| .Any({{{UniB32, UniP4}, !isNaturalAlignedSmall || !isUL}, {{UniInVgprB32}, {VgprP4}}}, hasSMRDSmall) // i8 and i16 load |
| .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}}) |
| .Any({{{UniB256, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP4}, SplitLoad}}) |
| .Any({{{UniB512, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP4}, SplitLoad}}) |
| |
| .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}); |
| |
| addRulesForGOpcs({G_ZEXTLOAD}) // i8 and i16 zero-extending loads |
| .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}}) |
| .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}}); |
| // clang-format on |
| |
| addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, Vector) |
| .Div(S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
| .Uni(S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
| .Div(V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
| .Uni(V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}); |
| |
| addRulesForGOpcs({G_STORE}) |
| .Any({{S32, P0}, {{}, {Vgpr32, VgprP0}}}) |
| .Any({{S32, P1}, {{}, {Vgpr32, VgprP1}}}) |
| .Any({{S64, P1}, {{}, {Vgpr64, VgprP1}}}) |
| .Any({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}}); |
| |
| addRulesForGOpcs({G_AMDGPU_BUFFER_STORE}) |
| .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}}); |
| |
| addRulesForGOpcs({G_PTR_ADD}) |
| .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}}) |
| .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}}) |
| .Any({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}}); |
| |
| addRulesForGOpcs({G_INTTOPTR}).Any({{UniP4}, {{SgprP4}, {Sgpr64}}}); |
| |
| addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); |
| |
| bool hasSALUFloat = ST->hasSALUFloatInsts(); |
| |
| addRulesForGOpcs({G_FADD}, Standard) |
| .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat) |
| .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat) |
| .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); |
| |
| addRulesForGOpcs({G_FPTOUI}) |
| .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) |
| .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); |
| |
| addRulesForGOpcs({G_UITOFP}) |
| .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}}) |
| .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) |
| .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); |
| |
| using namespace Intrinsic; |
| |
| addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); |
| |
| // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir. |
| addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}}); |
| |
| addRulesForIOpcs({amdgcn_if_break}, Standard) |
| .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}}); |
| |
| addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard) |
| .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}}); |
| |
| addRulesForIOpcs({amdgcn_readfirstlane}) |
| .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}}); |
| |
| } // end initialize rules |