| //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// This file implements the targeting of the RegisterBankInfo class for |
| /// AMDGPU. |
| /// \todo This should be generated by TableGen. |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPURegisterBankInfo.h" |
| #include "AMDGPUInstrInfo.h" |
| #include "AMDGPUSubtarget.h" |
| #include "SIMachineFunctionInfo.h" |
| #include "SIRegisterInfo.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| #include "llvm/CodeGen/GlobalISel/RegisterBank.h" |
| #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| #include "llvm/IR/Constants.h" |
| |
| #define GET_TARGET_REGBANK_IMPL |
| #include "AMDGPUGenRegisterBank.inc" |
| |
| // This file will be TableGen'ed at some point. |
| #include "AMDGPUGenRegisterBankInfo.def" |
| |
| using namespace llvm; |
| |
| AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) |
| : AMDGPUGenRegisterBankInfo(), |
| TRI(static_cast<const SIRegisterInfo*>(&TRI)) { |
| |
| // HACK: Until this is fully tablegen'd. |
| static bool AlreadyInit = false; |
| if (AlreadyInit) |
| return; |
| |
| AlreadyInit = true; |
| |
| const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID); |
| (void)RBSGPR; |
| assert(&RBSGPR == &AMDGPU::SGPRRegBank); |
| |
| const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID); |
| (void)RBVGPR; |
| assert(&RBVGPR == &AMDGPU::VGPRRegBank); |
| |
| } |
| |
| unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, |
| const RegisterBank &Src, |
| unsigned Size) const { |
| if (Dst.getID() == AMDGPU::SGPRRegBankID && |
| Src.getID() == AMDGPU::VGPRRegBankID) { |
| return std::numeric_limits<unsigned>::max(); |
| } |
| |
| // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by |
| // the valu. |
| if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID && |
| (Src.getID() == AMDGPU::SGPRRegBankID || |
| Src.getID() == AMDGPU::VGPRRegBankID || |
| Src.getID() == AMDGPU::VCCRegBankID)) |
| return std::numeric_limits<unsigned>::max(); |
| |
| if (Dst.getID() == AMDGPU::SCCRegBankID && |
| Src.getID() == AMDGPU::VCCRegBankID) |
| return std::numeric_limits<unsigned>::max(); |
| |
| return RegisterBankInfo::copyCost(Dst, Src, Size); |
| } |
| |
| unsigned AMDGPURegisterBankInfo::getBreakDownCost( |
| const ValueMapping &ValMapping, |
| const RegisterBank *CurBank) const { |
| assert(ValMapping.NumBreakDowns == 2 && |
| ValMapping.BreakDown[0].Length == 32 && |
| ValMapping.BreakDown[0].StartIdx == 0 && |
| ValMapping.BreakDown[1].Length == 32 && |
| ValMapping.BreakDown[1].StartIdx == 32 && |
| ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank); |
| |
| // 32-bit extract of a 64-bit value is just access of a subregister, so free. |
| // TODO: Cost of 0 hits assert, though it's not clear it's what we really |
| // want. |
| |
| // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR |
| // alignment restrictions, but this probably isn't important. |
| return 1; |
| } |
| |
| const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( |
| const TargetRegisterClass &RC) const { |
| |
| if (TRI->isSGPRClass(&RC)) |
| return getRegBank(AMDGPU::SGPRRegBankID); |
| |
| return getRegBank(AMDGPU::VGPRRegBankID); |
| } |
| |
| template <unsigned NumOps> |
| RegisterBankInfo::InstructionMappings |
| AMDGPURegisterBankInfo::addMappingFromTable( |
| const MachineInstr &MI, const MachineRegisterInfo &MRI, |
| const std::array<unsigned, NumOps> RegSrcOpIdx, |
| ArrayRef<OpRegBankEntry<NumOps>> Table) const { |
| |
| InstructionMappings AltMappings; |
| |
| SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands()); |
| |
| unsigned Sizes[NumOps]; |
| for (unsigned I = 0; I < NumOps; ++I) { |
| unsigned Reg = MI.getOperand(RegSrcOpIdx[I]).getReg(); |
| Sizes[I] = getSizeInBits(Reg, MRI, *TRI); |
| } |
| |
| for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) { |
| unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI); |
| Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI); |
| } |
| |
| unsigned MappingID = 0; |
| for (const auto &Entry : Table) { |
| for (unsigned I = 0; I < NumOps; ++I) { |
| int OpIdx = RegSrcOpIdx[I]; |
| Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]); |
| } |
| |
| AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost, |
| getOperandsMapping(Operands), |
| Operands.size())); |
| } |
| |
| return AltMappings; |
| } |
| |
| RegisterBankInfo::InstructionMappings |
| AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( |
| const MachineInstr &MI, const MachineRegisterInfo &MRI) const { |
| |
| switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { |
| case Intrinsic::amdgcn_buffer_load: { |
| static const OpRegBankEntry<3> Table[4] = { |
| // Perfectly legal. |
| { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 }, |
| { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 }, |
| |
| // Waterfall loop needed for rsrc. In the worst case this will execute |
| // approximately an extra 10 * wavesize + 2 instructions. |
| { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 }, |
| { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 } |
| }; |
| |
| // rsrc, voffset, offset |
| const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } }; |
| return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); |
| } |
| case Intrinsic::amdgcn_s_buffer_load: { |
| static const OpRegBankEntry<2> Table[4] = { |
| // Perfectly legal. |
| { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 }, |
| |
| // Only need 1 register in loop |
| { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 }, |
| |
| // Have to waterfall the resource. |
| { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 }, |
| |
| // Have to waterfall the resource, and the offset. |
| { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 } |
| }; |
| |
| // rsrc, offset |
| const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } }; |
| return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); |
| } |
| default: |
| return RegisterBankInfo::getInstrAlternativeMappings(MI); |
| } |
| } |
| |
| RegisterBankInfo::InstructionMappings |
| AMDGPURegisterBankInfo::getInstrAlternativeMappings( |
| const MachineInstr &MI) const { |
| |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| |
| |
| InstructionMappings AltMappings; |
| switch (MI.getOpcode()) { |
| case TargetOpcode::G_AND: |
| case TargetOpcode::G_OR: |
| case TargetOpcode::G_XOR: { |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| if (Size != 64) |
| break; |
| |
| const InstructionMapping &SSMapping = getInstructionMapping( |
| 1, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), |
| 3); // Num Operands |
| AltMappings.push_back(&SSMapping); |
| |
| const InstructionMapping &VVMapping = getInstructionMapping( |
| 2, 2, getOperandsMapping( |
| {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), |
| 3); // Num Operands |
| AltMappings.push_back(&VVMapping); |
| |
| const InstructionMapping &SVMapping = getInstructionMapping( |
| 3, 3, getOperandsMapping( |
| {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), |
| 3); // Num Operands |
| AltMappings.push_back(&SVMapping); |
| |
| // SGPR in LHS is slightly preferrable, so make it VS more expnesive than |
| // SV. |
| const InstructionMapping &VSMapping = getInstructionMapping( |
| 3, 4, getOperandsMapping( |
| {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), |
| 3); // Num Operands |
| AltMappings.push_back(&VSMapping); |
| break; |
| } |
| case TargetOpcode::G_LOAD: { |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| // FIXME: Should we be hard coding the size for these mappings? |
| const InstructionMapping &SSMapping = getInstructionMapping( |
| 1, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), |
| 2); // Num Operands |
| AltMappings.push_back(&SSMapping); |
| |
| const InstructionMapping &VVMapping = getInstructionMapping( |
| 2, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), |
| 2); // Num Operands |
| AltMappings.push_back(&VVMapping); |
| |
| // FIXME: Should this be the pointer-size (64-bits) or the size of the |
| // register that will hold the bufffer resourc (128-bits). |
| const InstructionMapping &VSMapping = getInstructionMapping( |
| 3, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), |
| 2); // Num Operands |
| AltMappings.push_back(&VSMapping); |
| |
| return AltMappings; |
| |
| } |
| case TargetOpcode::G_ICMP: { |
| unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI); |
| const InstructionMapping &SSMapping = getInstructionMapping(1, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), |
| nullptr, // Predicate operand. |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&SSMapping); |
| |
| const InstructionMapping &SVMapping = getInstructionMapping(2, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), |
| nullptr, // Predicate operand. |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&SVMapping); |
| |
| const InstructionMapping &VSMapping = getInstructionMapping(3, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), |
| nullptr, // Predicate operand. |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&VSMapping); |
| |
| const InstructionMapping &VVMapping = getInstructionMapping(4, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), |
| nullptr, // Predicate operand. |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&VVMapping); |
| |
| return AltMappings; |
| } |
| case TargetOpcode::G_SELECT: { |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| const InstructionMapping &SSMapping = getInstructionMapping(1, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&SSMapping); |
| |
| const InstructionMapping &VVMapping = getInstructionMapping(2, 1, |
| getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), |
| 4); // Num Operands |
| AltMappings.push_back(&VVMapping); |
| |
| return AltMappings; |
| } |
| case TargetOpcode::G_UADDE: |
| case TargetOpcode::G_USUBE: |
| case TargetOpcode::G_SADDE: |
| case TargetOpcode::G_SSUBE: { |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| const InstructionMapping &SSMapping = getInstructionMapping(1, 1, |
| getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}), |
| 5); // Num Operands |
| AltMappings.push_back(&SSMapping); |
| |
| const InstructionMapping &VVMapping = getInstructionMapping(2, 1, |
| getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), |
| AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}), |
| 5); // Num Operands |
| AltMappings.push_back(&VVMapping); |
| return AltMappings; |
| } |
| case AMDGPU::G_BRCOND: { |
| assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); |
| |
| const InstructionMapping &SMapping = getInstructionMapping( |
| 1, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}), |
| 2); // Num Operands |
| AltMappings.push_back(&SMapping); |
| |
| const InstructionMapping &VMapping = getInstructionMapping( |
| 1, 1, getOperandsMapping( |
| {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }), |
| 2); // Num Operands |
| AltMappings.push_back(&VMapping); |
| return AltMappings; |
| } |
| case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: |
| return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI); |
| default: |
| break; |
| } |
| return RegisterBankInfo::getInstrAlternativeMappings(MI); |
| } |
| |
| void AMDGPURegisterBankInfo::split64BitValueForMapping( |
| MachineIRBuilder &B, |
| SmallVector<Register, 2> &Regs, |
| LLT HalfTy, |
| unsigned Reg) const { |
| assert(HalfTy.getSizeInBits() == 32); |
| MachineRegisterInfo *MRI = B.getMRI(); |
| unsigned LoLHS = MRI->createGenericVirtualRegister(HalfTy); |
| unsigned HiLHS = MRI->createGenericVirtualRegister(HalfTy); |
| const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI); |
| MRI->setRegBank(LoLHS, *Bank); |
| MRI->setRegBank(HiLHS, *Bank); |
| |
| Regs.push_back(LoLHS); |
| Regs.push_back(HiLHS); |
| |
| B.buildInstr(AMDGPU::G_UNMERGE_VALUES) |
| .addDef(LoLHS) |
| .addDef(HiLHS) |
| .addUse(Reg); |
| } |
| |
| /// Replace the current type each register in \p Regs has with \p NewTy |
| static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef<Register> Regs, |
| LLT NewTy) { |
| for (unsigned Reg : Regs) { |
| assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits()); |
| MRI.setType(Reg, NewTy); |
| } |
| } |
| |
| static LLT getHalfSizedType(LLT Ty) { |
| if (Ty.isVector()) { |
| assert(Ty.getNumElements() % 2 == 0); |
| return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType()); |
| } |
| |
| assert(Ty.getSizeInBits() % 2 == 0); |
| return LLT::scalar(Ty.getSizeInBits() / 2); |
| } |
| |
| /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If |
| /// any of the required SGPR operands are VGPRs, perform a waterfall loop to |
| /// execute the instruction for each unique combination of values in all lanes |
| /// in the wave. The block will be split such that rest of the instructions are |
| /// moved to a new block. |
| /// |
| /// Essentially performs this loop: |
| // |
| /// Save Execution Mask |
| /// For (Lane : Wavefront) { |
| /// Enable Lane, Disable all other lanes |
| /// SGPR = read SGPR value for current lane from VGPR |
| /// VGPRResult[Lane] = use_op SGPR |
| /// } |
| /// Restore Execution Mask |
| /// |
| /// There is additional complexity to try for compare values to identify the |
| /// unique values used. |
| void AMDGPURegisterBankInfo::executeInWaterfallLoop( |
| MachineInstr &MI, MachineRegisterInfo &MRI, |
| ArrayRef<unsigned> OpIndices) const { |
| MachineFunction *MF = MI.getParent()->getParent(); |
| const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
| const SIInstrInfo *TII = ST.getInstrInfo(); |
| MachineBasicBlock::iterator I(MI); |
| |
| MachineBasicBlock &MBB = *MI.getParent(); |
| const DebugLoc &DL = MI.getDebugLoc(); |
| |
| // Use a set to avoid extra readfirstlanes in the case where multiple operands |
| // are the same register. |
| SmallSet<Register, 4> SGPROperandRegs; |
| for (unsigned Op : OpIndices) { |
| assert(MI.getOperand(Op).isUse()); |
| unsigned Reg = MI.getOperand(Op).getReg(); |
| const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); |
| if (OpBank->getID() == AMDGPU::VGPRRegBankID) |
| SGPROperandRegs.insert(Reg); |
| } |
| |
| // No operands need to be replaced, so no need to loop. |
| if (SGPROperandRegs.empty()) |
| return; |
| |
| MachineIRBuilder B(MI); |
| SmallVector<Register, 4> ResultRegs; |
| SmallVector<Register, 4> InitResultRegs; |
| SmallVector<Register, 4> PhiRegs; |
| for (MachineOperand &Def : MI.defs()) { |
| LLT ResTy = MRI.getType(Def.getReg()); |
| const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); |
| ResultRegs.push_back(Def.getReg()); |
| unsigned InitReg = B.buildUndef(ResTy).getReg(0); |
| unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy); |
| InitResultRegs.push_back(InitReg); |
| PhiRegs.push_back(PhiReg); |
| MRI.setRegBank(PhiReg, *DefBank); |
| MRI.setRegBank(InitReg, *DefBank); |
| } |
| |
| unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); |
| unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); |
| |
| // Don't bother using generic instructions/registers for the exec mask. |
| B.buildInstr(TargetOpcode::IMPLICIT_DEF) |
| .addDef(InitSaveExecReg); |
| |
| unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
| unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
| |
| // To insert the loop we need to split the block. Move everything before this |
| // point to a new block, and insert a new empty block before this instruction. |
| MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(); |
| MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock(); |
| MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock(); |
| MachineFunction::iterator MBBI(MBB); |
| ++MBBI; |
| MF->insert(MBBI, LoopBB); |
| MF->insert(MBBI, RestoreExecBB); |
| MF->insert(MBBI, RemainderBB); |
| |
| LoopBB->addSuccessor(RestoreExecBB); |
| LoopBB->addSuccessor(LoopBB); |
| |
| // Move the rest of the block into a new block. |
| RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); |
| RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); |
| |
| MBB.addSuccessor(LoopBB); |
| RestoreExecBB->addSuccessor(RemainderBB); |
| |
| B.setInsertPt(*LoopBB, LoopBB->end()); |
| |
| B.buildInstr(TargetOpcode::PHI) |
| .addDef(PhiExec) |
| .addReg(InitSaveExecReg) |
| .addMBB(&MBB) |
| .addReg(NewExec) |
| .addMBB(LoopBB); |
| |
| for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) { |
| B.buildInstr(TargetOpcode::G_PHI) |
| .addDef(std::get<2>(Result)) |
| .addReg(std::get<0>(Result)) // Initial value / implicit_def |
| .addMBB(&MBB) |
| .addReg(std::get<1>(Result)) // Mid-loop value. |
| .addMBB(LoopBB); |
| } |
| |
| // Move the instruction into the loop. |
| LoopBB->splice(LoopBB->end(), &MBB, I); |
| I = std::prev(LoopBB->end()); |
| |
| B.setInstr(*I); |
| |
| unsigned CondReg = AMDGPU::NoRegister; |
| |
| for (MachineOperand &Op : MI.uses()) { |
| if (!Op.isReg()) |
| continue; |
| |
| assert(!Op.isDef()); |
| if (SGPROperandRegs.count(Op.getReg())) { |
| LLT OpTy = MRI.getType(Op.getReg()); |
| unsigned OpSize = OpTy.getSizeInBits(); |
| |
| // Can only do a readlane of 32-bit pieces. |
| if (OpSize == 32) { |
| // Avoid extra copies in the simple case of one 32-bit register. |
| unsigned CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
| MRI.setType(CurrentLaneOpReg, OpTy); |
| |
| constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); |
| // Read the next variant <- also loop target. |
| BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg) |
| .addReg(Op.getReg()); |
| |
| unsigned NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
| bool First = CondReg == AMDGPU::NoRegister; |
| if (First) |
| CondReg = NewCondReg; |
| |
| // Compare the just read M0 value to all possible Idx values. |
| B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) |
| .addDef(NewCondReg) |
| .addReg(CurrentLaneOpReg) |
| .addReg(Op.getReg()); |
| Op.setReg(CurrentLaneOpReg); |
| |
| if (!First) { |
| unsigned AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); |
| |
| // If there are multiple operands to consider, and the conditions. |
| B.buildInstr(AMDGPU::S_AND_B64) |
| .addDef(AndReg) |
| .addReg(NewCondReg) |
| .addReg(CondReg); |
| CondReg = AndReg; |
| } |
| } else { |
| LLT S32 = LLT::scalar(32); |
| SmallVector<Register, 8> ReadlanePieces; |
| |
| // The compares can be done as 64-bit, but the extract needs to be done |
| // in 32-bit pieces. |
| |
| bool Is64 = OpSize % 64 == 0; |
| |
| LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32); |
| unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64 |
| : AMDGPU::V_CMP_EQ_U32_e64; |
| |
| // The compares can be done as 64-bit, but the extract needs to be done |
| // in 32-bit pieces. |
| |
| // Insert the unmerge before the loop. |
| |
| B.setMBB(MBB); |
| auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg()); |
| B.setInstr(*I); |
| |
| unsigned NumPieces = Unmerge->getNumOperands() - 1; |
| for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) { |
| unsigned UnmergePiece = Unmerge.getReg(PieceIdx); |
| |
| unsigned CurrentLaneOpReg; |
| if (Is64) { |
| unsigned CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); |
| unsigned CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); |
| |
| MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); |
| MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); |
| MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); |
| |
| // Read the next variant <- also loop target. |
| BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), |
| CurrentLaneOpRegLo) |
| .addReg(UnmergePiece, 0, AMDGPU::sub0); |
| |
| // Read the next variant <- also loop target. |
| BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), |
| CurrentLaneOpRegHi) |
| .addReg(UnmergePiece, 0, AMDGPU::sub1); |
| |
| CurrentLaneOpReg = |
| B.buildMerge(LLT::scalar(64), |
| {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) |
| .getReg(0); |
| |
| MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); |
| |
| if (OpTy.getScalarSizeInBits() == 64) { |
| // If we need to produce a 64-bit element vector, so use the |
| // merged pieces |
| ReadlanePieces.push_back(CurrentLaneOpReg); |
| } else { |
| // 32-bit element type. |
| ReadlanePieces.push_back(CurrentLaneOpRegLo); |
| ReadlanePieces.push_back(CurrentLaneOpRegHi); |
| } |
| } else { |
| CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); |
| MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); |
| MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); |
| |
| // Read the next variant <- also loop target. |
| BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), |
| CurrentLaneOpReg) |
| .addReg(UnmergePiece); |
| ReadlanePieces.push_back(CurrentLaneOpReg); |
| } |
| |
| unsigned NewCondReg |
| = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); |
| bool First = CondReg == AMDGPU::NoRegister; |
| if (First) |
| CondReg = NewCondReg; |
| |
| B.buildInstr(CmpOp) |
| .addDef(NewCondReg) |
| .addReg(CurrentLaneOpReg) |
| .addReg(UnmergePiece); |
| |
| if (!First) { |
| unsigned AndReg |
| = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); |
| |
| // If there are multiple operands to consider, and the conditions. |
| B.buildInstr(AMDGPU::S_AND_B64) |
| .addDef(AndReg) |
| .addReg(NewCondReg) |
| .addReg(CondReg); |
| CondReg = AndReg; |
| } |
| } |
| |
| // FIXME: Build merge seems to switch to CONCAT_VECTORS but not |
| // BUILD_VECTOR |
| if (OpTy.isVector()) { |
| auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); |
| Op.setReg(Merge.getReg(0)); |
| } else { |
| auto Merge = B.buildMerge(OpTy, ReadlanePieces); |
| Op.setReg(Merge.getReg(0)); |
| } |
| |
| MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID)); |
| } |
| } |
| } |
| |
| B.setInsertPt(*LoopBB, LoopBB->end()); |
| |
| // Update EXEC, save the original EXEC value to VCC. |
| B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64) |
| .addDef(NewExec) |
| .addReg(CondReg, RegState::Kill); |
| |
| MRI.setSimpleHint(NewExec, CondReg); |
| |
| // Update EXEC, switch all done bits to 0 and all todo bits to 1. |
| B.buildInstr(AMDGPU::S_XOR_B64_term) |
| .addDef(AMDGPU::EXEC) |
| .addReg(AMDGPU::EXEC) |
| .addReg(NewExec); |
| |
| // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use |
| // s_cbranch_scc0? |
| |
| // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover. |
| B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ) |
| .addMBB(LoopBB); |
| |
| // Save the EXEC mask before the loop. |
| BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg) |
| .addReg(AMDGPU::EXEC); |
| |
| // Restore the EXEC mask after the loop. |
| B.setMBB(*RestoreExecBB); |
| B.buildInstr(AMDGPU::S_MOV_B64_term) |
| .addDef(AMDGPU::EXEC) |
| .addReg(SaveExecReg); |
| } |
| |
| void AMDGPURegisterBankInfo::applyMappingImpl( |
| const OperandsMapper &OpdMapper) const { |
| MachineInstr &MI = OpdMapper.getMI(); |
| unsigned Opc = MI.getOpcode(); |
| MachineRegisterInfo &MRI = OpdMapper.getMRI(); |
| switch (Opc) { |
| case AMDGPU::G_SELECT: { |
| unsigned DstReg = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(DstReg); |
| if (DstTy.getSizeInBits() != 64) |
| break; |
| |
| LLT HalfTy = getHalfSizedType(DstTy); |
| |
| SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0)); |
| SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1)); |
| SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2)); |
| SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3)); |
| |
| // All inputs are SGPRs, nothing special to do. |
| if (DefRegs.empty()) { |
| assert(Src1Regs.empty() && Src2Regs.empty()); |
| break; |
| } |
| |
| MachineIRBuilder B(MI); |
| if (Src0Regs.empty()) |
| Src0Regs.push_back(MI.getOperand(1).getReg()); |
| else { |
| assert(Src0Regs.size() == 1); |
| } |
| |
| if (Src1Regs.empty()) |
| split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg()); |
| else { |
| setRegsToType(MRI, Src1Regs, HalfTy); |
| } |
| |
| if (Src2Regs.empty()) |
| split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg()); |
| else |
| setRegsToType(MRI, Src2Regs, HalfTy); |
| |
| setRegsToType(MRI, DefRegs, HalfTy); |
| |
| B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]); |
| B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]); |
| |
| MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); |
| MI.eraseFromParent(); |
| return; |
| } |
| case AMDGPU::G_AND: |
| case AMDGPU::G_OR: |
| case AMDGPU::G_XOR: { |
| // 64-bit and is only available on the SALU, so split into 2 32-bit ops if |
| // there is a VGPR input. |
| unsigned DstReg = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(DstReg); |
| if (DstTy.getSizeInBits() != 64) |
| break; |
| |
| LLT HalfTy = getHalfSizedType(DstTy); |
| SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0)); |
| SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1)); |
| SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2)); |
| |
| // All inputs are SGPRs, nothing special to do. |
| if (DefRegs.empty()) { |
| assert(Src0Regs.empty() && Src1Regs.empty()); |
| break; |
| } |
| |
| assert(DefRegs.size() == 2); |
| assert(Src0Regs.size() == Src1Regs.size() && |
| (Src0Regs.empty() || Src0Regs.size() == 2)); |
| |
| // Depending on where the source registers came from, the generic code may |
| // have decided to split the inputs already or not. If not, we still need to |
| // extract the values. |
| MachineIRBuilder B(MI); |
| |
| if (Src0Regs.empty()) |
| split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg()); |
| else |
| setRegsToType(MRI, Src0Regs, HalfTy); |
| |
| if (Src1Regs.empty()) |
| split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg()); |
| else |
| setRegsToType(MRI, Src1Regs, HalfTy); |
| |
| setRegsToType(MRI, DefRegs, HalfTy); |
| |
| B.buildInstr(Opc) |
| .addDef(DefRegs[0]) |
| .addUse(Src0Regs[0]) |
| .addUse(Src1Regs[0]); |
| |
| B.buildInstr(Opc) |
| .addDef(DefRegs[1]) |
| .addUse(Src0Regs[1]) |
| .addUse(Src1Regs[1]); |
| |
| MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); |
| MI.eraseFromParent(); |
| return; |
| } |
| case AMDGPU::G_SEXT: |
| case AMDGPU::G_ZEXT: { |
| Register SrcReg = MI.getOperand(1).getReg(); |
| LLT SrcTy = MRI.getType(SrcReg); |
| bool Signed = Opc == AMDGPU::G_SEXT; |
| |
| MachineIRBuilder B(MI); |
| const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); |
| |
| Register DstReg = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(DstReg); |
| if (DstTy.isScalar() && |
| SrcBank != &AMDGPU::SGPRRegBank && |
| SrcBank != &AMDGPU::SCCRegBank && |
| SrcBank != &AMDGPU::VCCRegBank && |
| // FIXME: Should handle any type that round to s64 when irregular |
| // breakdowns supported. |
| DstTy.getSizeInBits() == 64 && |
| SrcTy.getSizeInBits() <= 32) { |
| const LLT S32 = LLT::scalar(32); |
| SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0)); |
| |
| // Extend to 32-bit, and then extend the low half. |
| if (Signed) { |
| // TODO: Should really be buildSExtOrCopy |
| B.buildSExtOrTrunc(DefRegs[0], SrcReg); |
| |
| // Replicate sign bit from 32-bit extended part. |
| auto ShiftAmt = B.buildConstant(S32, 31); |
| MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank); |
| B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt); |
| } else { |
| B.buildZExtOrTrunc(DefRegs[0], SrcReg); |
| B.buildConstant(DefRegs[1], 0); |
| } |
| |
| MRI.setRegBank(DstReg, *SrcBank); |
| MI.eraseFromParent(); |
| return; |
| } |
| |
| if (SrcTy != LLT::scalar(1)) |
| return; |
| |
| if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) { |
| SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0)); |
| |
| const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ? |
| &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank; |
| |
| unsigned DstSize = DstTy.getSizeInBits(); |
| // 64-bit select is SGPR only |
| const bool UseSel64 = DstSize > 32 && |
| SrcBank->getID() == AMDGPU::SCCRegBankID; |
| |
| // TODO: Should s16 select be legal? |
| LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32); |
| auto True = B.buildConstant(SelType, Signed ? -1 : 1); |
| auto False = B.buildConstant(SelType, 0); |
| |
| MRI.setRegBank(True.getReg(0), *DstBank); |
| MRI.setRegBank(False.getReg(0), *DstBank); |
| MRI.setRegBank(DstReg, *DstBank); |
| |
| if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) { |
| B.buildSelect(DefRegs[0], SrcReg, True, False); |
| B.buildCopy(DefRegs[1], DefRegs[0]); |
| } else if (DstSize < 32) { |
| auto Sel = B.buildSelect(SelType, SrcReg, True, False); |
| MRI.setRegBank(Sel.getReg(0), *DstBank); |
| B.buildTrunc(DstReg, Sel); |
| } else { |
| B.buildSelect(DstReg, SrcReg, True, False); |
| } |
| |
| MI.eraseFromParent(); |
| return; |
| } |
| |
| // Fixup the case with an s1 src that isn't a condition register. Use shifts |
| // instead of introducing a compare to avoid an unnecessary condition |
| // register (and since there's no scalar 16-bit compares). |
| auto Ext = B.buildAnyExt(DstTy, SrcReg); |
| auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1); |
| auto Shl = B.buildShl(DstTy, Ext, ShiftAmt); |
| |
| if (MI.getOpcode() == AMDGPU::G_SEXT) |
| B.buildAShr(DstReg, Shl, ShiftAmt); |
| else |
| B.buildLShr(DstReg, Shl, ShiftAmt); |
| |
| MRI.setRegBank(DstReg, *SrcBank); |
| MRI.setRegBank(Ext.getReg(0), *SrcBank); |
| MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank); |
| MRI.setRegBank(Shl.getReg(0), *SrcBank); |
| MI.eraseFromParent(); |
| return; |
| } |
| case AMDGPU::G_EXTRACT_VECTOR_ELT: |
| applyDefaultMapping(OpdMapper); |
| executeInWaterfallLoop(MI, MRI, { 2 }); |
| return; |
| case AMDGPU::G_INTRINSIC: { |
| switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { |
| case Intrinsic::amdgcn_s_buffer_load: { |
| // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS |
| executeInWaterfallLoop(MI, MRI, { 2, 3 }); |
| return; |
| } |
| default: |
| break; |
| } |
| break; |
| } |
| case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { |
| switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { |
| case Intrinsic::amdgcn_buffer_load: { |
| executeInWaterfallLoop(MI, MRI, { 2 }); |
| return; |
| } |
| default: |
| break; |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| |
| return applyDefaultMapping(OpdMapper); |
| } |
| |
| static bool isInstrUniform(const MachineInstr &MI) { |
| if (!MI.hasOneMemOperand()) |
| return false; |
| |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| return AMDGPUInstrInfo::isUniformMMO(MMO); |
| } |
| |
| bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) { |
| if (!MI.getOperand(i).isReg()) |
| continue; |
| unsigned Reg = MI.getOperand(i).getReg(); |
| if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { |
| if (Bank->getID() == AMDGPU::VGPRRegBankID) |
| return false; |
| |
| assert(Bank->getID() == AMDGPU::SGPRRegBankID || |
| Bank->getID() == AMDGPU::SCCRegBankID); |
| } |
| } |
| return true; |
| } |
| |
| const RegisterBankInfo::InstructionMapping & |
| AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const { |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); |
| |
| for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
| unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); |
| unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID; |
| OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size); |
| } |
| return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), |
| MI.getNumOperands()); |
| } |
| |
| const RegisterBankInfo::InstructionMapping & |
| AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const { |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); |
| unsigned OpdIdx = 0; |
| |
| unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0); |
| |
| if (MI.getOperand(OpdIdx).isIntrinsicID()) |
| OpdsMapping[OpdIdx++] = nullptr; |
| |
| unsigned Reg1 = MI.getOperand(OpdIdx).getReg(); |
| unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI); |
| |
| unsigned DefaultBankID = Size1 == 1 ? |
| AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID; |
| unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID); |
| |
| OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1); |
| |
| for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) { |
| const MachineOperand &MO = MI.getOperand(OpdIdx); |
| if (!MO.isReg()) |
| continue; |
| |
| unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI); |
| unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID; |
| OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size); |
| } |
| |
| return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), |
| MI.getNumOperands()); |
| } |
| |
| const RegisterBankInfo::InstructionMapping & |
| AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); |
| |
| for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { |
| unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI); |
| OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); |
| } |
| |
| return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), |
| MI.getNumOperands()); |
| } |
| |
| const RegisterBankInfo::InstructionMapping & |
| AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { |
| |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); |
| |
| const ValueMapping *ValMapping; |
| const ValueMapping *PtrMapping; |
| |
| if (isInstrUniform(MI)) { |
| // We have a uniform instruction so we want to use an SMRD load |
| ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); |
| PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); |
| } else { |
| ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); |
| // FIXME: What would happen if we used SGPRRegBankID here? |
| PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); |
| } |
| |
| OpdsMapping[0] = ValMapping; |
| OpdsMapping[1] = PtrMapping; |
| const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping( |
| 1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); |
| return Mapping; |
| |
| // FIXME: Do we want to add a mapping for FLAT load, or should we just |
| // handle that during instruction selection? |
| } |
| |
| unsigned |
| AMDGPURegisterBankInfo::getRegBankID(unsigned Reg, |
| const MachineRegisterInfo &MRI, |
| const TargetRegisterInfo &TRI, |
| unsigned Default) const { |
| |
| const RegisterBank *Bank = getRegBank(Reg, MRI, TRI); |
| return Bank ? Bank->getID() : Default; |
| } |
| |
| /// |
| /// This function must return a legal mapping, because |
| /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called |
| /// in RegBankSelect::Mode::Fast. Any mapping that would cause a |
| /// VGPR to SGPR generated is illegal. |
| /// |
| const RegisterBankInfo::InstructionMapping & |
| AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| |
| if (MI.isRegSequence()) { |
| // If any input is a VGPR, the result must be a VGPR. The default handling |
| // assumes any copy between banks is legal. |
| unsigned BankID = AMDGPU::SGPRRegBankID; |
| |
| for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
| auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI); |
| // It doesn't make sense to use vcc or scc banks here, so just ignore |
| // them. |
| if (OpBank != AMDGPU::SGPRRegBankID) { |
| BankID = AMDGPU::VGPRRegBankID; |
| break; |
| } |
| } |
| unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| |
| const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID)); |
| return getInstructionMapping( |
| 1, /*Cost*/ 1, |
| /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1); |
| } |
| |
| // The default handling is broken and doesn't handle illegal SGPR->VGPR copies |
| // properly. |
| // |
| // TODO: There are additional exec masking dependencies to analyze. |
| if (MI.getOpcode() == TargetOpcode::G_PHI) { |
| // TODO: Generate proper invalid bank enum. |
| int ResultBank = -1; |
| |
| for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
| unsigned Reg = MI.getOperand(I).getReg(); |
| const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); |
| |
| // FIXME: Assuming VGPR for any undetermined inputs. |
| if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) { |
| ResultBank = AMDGPU::VGPRRegBankID; |
| break; |
| } |
| |
| unsigned OpBank = Bank->getID(); |
| // scc, scc -> sgpr |
| if (OpBank == AMDGPU::SCCRegBankID) { |
| // There's only one SCC register, so a phi requires copying to SGPR. |
| OpBank = AMDGPU::SGPRRegBankID; |
| } else if (OpBank == AMDGPU::VCCRegBankID) { |
| // vcc, vcc -> vcc |
| // vcc, sgpr -> vgpr |
| if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) { |
| ResultBank = AMDGPU::VGPRRegBankID; |
| break; |
| } |
| } |
| |
| ResultBank = OpBank; |
| } |
| |
| assert(ResultBank != -1); |
| |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| |
| const ValueMapping &ValMap = |
| getValueMapping(0, Size, getRegBank(ResultBank)); |
| return getInstructionMapping( |
| 1, /*Cost*/ 1, |
| /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1); |
| } |
| |
| const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); |
| if (Mapping.isValid()) |
| return Mapping; |
| |
| SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); |
| |
| switch (MI.getOpcode()) { |
| default: |
| return getInvalidInstructionMapping(); |
| |
| case AMDGPU::G_AND: |
| case AMDGPU::G_OR: |
| case AMDGPU::G_XOR: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| if (Size == 1) { |
| OpdsMapping[0] = OpdsMapping[1] = |
| OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size); |
| break; |
| } |
| |
| if (Size == 64) { |
| |
| if (isSALUMapping(MI)) { |
| OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); |
| OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; |
| } else { |
| OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); |
| unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); |
| OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); |
| |
| unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); |
| OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); |
| } |
| |
| break; |
| } |
| |
| LLVM_FALLTHROUGH; |
| } |
| |
| case AMDGPU::G_GEP: |
| case AMDGPU::G_ADD: |
| case AMDGPU::G_SUB: |
| case AMDGPU::G_MUL: |
| case AMDGPU::G_SHL: |
| case AMDGPU::G_LSHR: |
| case AMDGPU::G_ASHR: |
| case AMDGPU::G_UADDO: |
| case AMDGPU::G_SADDO: |
| case AMDGPU::G_USUBO: |
| case AMDGPU::G_SSUBO: |
| case AMDGPU::G_UADDE: |
| case AMDGPU::G_SADDE: |
| case AMDGPU::G_USUBE: |
| case AMDGPU::G_SSUBE: |
| case AMDGPU::G_UMULH: |
| case AMDGPU::G_SMULH: |
| if (isSALUMapping(MI)) |
| return getDefaultMappingSOP(MI); |
| LLVM_FALLTHROUGH; |
| |
| case AMDGPU::G_SMIN: |
| case AMDGPU::G_SMAX: |
| case AMDGPU::G_UMIN: |
| case AMDGPU::G_UMAX: |
| // TODO: min/max can be scalar, but requires expanding as a compare and |
| // select. |
| |
| case AMDGPU::G_FADD: |
| case AMDGPU::G_FSUB: |
| case AMDGPU::G_FPTOSI: |
| case AMDGPU::G_FPTOUI: |
| case AMDGPU::G_FMUL: |
| case AMDGPU::G_FMA: |
| case AMDGPU::G_FSQRT: |
| case AMDGPU::G_SITOFP: |
| case AMDGPU::G_UITOFP: |
| case AMDGPU::G_FPTRUNC: |
| case AMDGPU::G_FPEXT: |
| case AMDGPU::G_FEXP2: |
| case AMDGPU::G_FLOG2: |
| case AMDGPU::G_INTRINSIC_TRUNC: |
| case AMDGPU::G_INTRINSIC_ROUND: |
| return getDefaultMappingVOP(MI); |
| case AMDGPU::G_IMPLICIT_DEF: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); |
| break; |
| } |
| case AMDGPU::G_FCONSTANT: |
| case AMDGPU::G_CONSTANT: |
| case AMDGPU::G_FRAME_INDEX: |
| case AMDGPU::G_BLOCK_ADDR: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); |
| break; |
| } |
| case AMDGPU::G_INSERT: { |
| unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : |
| AMDGPU::VGPRRegBankID; |
| unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); |
| unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI); |
| OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize); |
| OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize); |
| OpdsMapping[3] = nullptr; |
| break; |
| } |
| case AMDGPU::G_EXTRACT: { |
| unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); |
| unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); |
| unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); |
| OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize); |
| OpdsMapping[2] = nullptr; |
| break; |
| } |
| case AMDGPU::G_MERGE_VALUES: { |
| unsigned Bank = isSALUMapping(MI) ? |
| AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; |
| unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize); |
| // Op1 and Dst should use the same register bank. |
| for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) |
| OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize); |
| break; |
| } |
| case AMDGPU::G_BITCAST: |
| case AMDGPU::G_INTTOPTR: |
| case AMDGPU::G_PTRTOINT: |
| case AMDGPU::G_CTLZ: |
| case AMDGPU::G_CTLZ_ZERO_UNDEF: |
| case AMDGPU::G_CTTZ: |
| case AMDGPU::G_CTTZ_ZERO_UNDEF: |
| case AMDGPU::G_CTPOP: |
| case AMDGPU::G_BSWAP: |
| case AMDGPU::G_FABS: |
| case AMDGPU::G_FNEG: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); |
| OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); |
| break; |
| } |
| case AMDGPU::G_TRUNC: { |
| unsigned Dst = MI.getOperand(0).getReg(); |
| unsigned Src = MI.getOperand(1).getReg(); |
| unsigned Bank = getRegBankID(Src, MRI, *TRI); |
| unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); |
| unsigned SrcSize = getSizeInBits(Src, MRI, *TRI); |
| OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize); |
| break; |
| } |
| case AMDGPU::G_ZEXT: |
| case AMDGPU::G_SEXT: |
| case AMDGPU::G_ANYEXT: { |
| unsigned Dst = MI.getOperand(0).getReg(); |
| unsigned Src = MI.getOperand(1).getReg(); |
| unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); |
| unsigned SrcSize = getSizeInBits(Src, MRI, *TRI); |
| |
| unsigned DstBank; |
| const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI); |
| assert(SrcBank); |
| switch (SrcBank->getID()) { |
| case AMDGPU::SCCRegBankID: |
| case AMDGPU::SGPRRegBankID: |
| DstBank = AMDGPU::SGPRRegBankID; |
| break; |
| default: |
| DstBank = AMDGPU::VGPRRegBankID; |
| break; |
| } |
| |
| // TODO: Should anyext be split into 32-bit part as well? |
| if (MI.getOpcode() == AMDGPU::G_ANYEXT) { |
| OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize); |
| } else { |
| // Scalar extend can use 64-bit BFE, but VGPRs require extending to |
| // 32-bits, and then to 64. |
| OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize); |
| OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(), |
| SrcSize); |
| } |
| break; |
| } |
| case AMDGPU::G_FCMP: { |
| unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); |
| unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); |
| OpdsMapping[1] = nullptr; // Predicate Operand. |
| OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size); |
| OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); |
| break; |
| } |
| case AMDGPU::G_STORE: { |
| assert(MI.getOperand(0).isReg()); |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| // FIXME: We need to specify a different reg bank once scalar stores |
| // are supported. |
| const ValueMapping *ValMapping = |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); |
| // FIXME: Depending on the type of store, the pointer could be in |
| // the SGPR Reg bank. |
| // FIXME: Pointer size should be based on the address space. |
| const ValueMapping *PtrMapping = |
| AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); |
| |
| OpdsMapping[0] = ValMapping; |
| OpdsMapping[1] = PtrMapping; |
| break; |
| } |
| |
| case AMDGPU::G_ICMP: { |
| unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); |
| unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); |
| unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); |
| unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID && |
| Op3Bank == AMDGPU::SGPRRegBankID ? |
| AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; |
| OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1); |
| OpdsMapping[1] = nullptr; // Predicate Operand. |
| OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size); |
| OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size); |
| break; |
| } |
| |
| |
| case AMDGPU::G_EXTRACT_VECTOR_ELT: { |
| unsigned OutputBankID = isSALUMapping(MI) ? |
| AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; |
| unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); |
| unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); |
| unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); |
| |
| // The index can be either if the source vector is VGPR. |
| OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); |
| break; |
| } |
| case AMDGPU::G_INSERT_VECTOR_ELT: { |
| unsigned OutputBankID = isSALUMapping(MI) ? |
| AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; |
| |
| unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); |
| unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); |
| unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); |
| unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); |
| OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); |
| OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); |
| |
| // The index can be either if the source vector is VGPR. |
| OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); |
| break; |
| } |
| case AMDGPU::G_UNMERGE_VALUES: { |
| unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : |
| AMDGPU::VGPRRegBankID; |
| |
| // Op1 and Dst should use the same register bank. |
| // FIXME: Shouldn't this be the default? Why do we need to handle this? |
| for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
| unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); |
| OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size); |
| } |
| break; |
| } |
| case AMDGPU::G_INTRINSIC: { |
| switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { |
| default: |
| return getInvalidInstructionMapping(); |
| case Intrinsic::maxnum: |
| case Intrinsic::minnum: |
| case Intrinsic::amdgcn_cvt_pkrtz: |
| return getDefaultMappingVOP(MI); |
| case Intrinsic::amdgcn_kernarg_segment_ptr: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); |
| break; |
| } |
| case Intrinsic::amdgcn_wqm_vote: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| OpdsMapping[0] = OpdsMapping[2] |
| = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); |
| break; |
| } |
| case Intrinsic::amdgcn_s_buffer_load: { |
| // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS |
| unsigned RSrc = MI.getOperand(2).getReg(); // SGPR |
| unsigned Offset = MI.getOperand(3).getReg(); // SGPR/imm |
| |
| unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned Size2 = MRI.getType(RSrc).getSizeInBits(); |
| unsigned Size3 = MRI.getType(Offset).getSizeInBits(); |
| |
| unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI); |
| unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI); |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0); |
| OpdsMapping[1] = nullptr; // intrinsic id |
| |
| // Lie and claim everything is legal, even though some need to be |
| // SGPRs. applyMapping will have to deal with it as a waterfall loop. |
| OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc |
| OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3); |
| OpdsMapping[4] = nullptr; |
| break; |
| } |
| case Intrinsic::amdgcn_div_scale: { |
| unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size); |
| OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size); |
| |
| unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); |
| OpdsMapping[3] = AMDGPU::getValueMapping( |
| getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize); |
| OpdsMapping[4] = AMDGPU::getValueMapping( |
| getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize); |
| |
| break; |
| } |
| case Intrinsic::amdgcn_class: { |
| unsigned Src0Reg = MI.getOperand(2).getReg(); |
| unsigned Src1Reg = MI.getOperand(3).getReg(); |
| unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits(); |
| unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits(); |
| unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize); |
| OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI), |
| Src0Size); |
| OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI), |
| Src1Size); |
| break; |
| } |
| } |
| break; |
| } |
| case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { |
| switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { |
| default: |
| return getInvalidInstructionMapping(); |
| case Intrinsic::amdgcn_exp_compr: |
| OpdsMapping[0] = nullptr; // IntrinsicID |
| // FIXME: These are immediate values which can't be read from registers. |
| OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| // FIXME: Could we support packed types here? |
| OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| // FIXME: These are immediate values which can't be read from registers. |
| OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| break; |
| case Intrinsic::amdgcn_exp: |
| OpdsMapping[0] = nullptr; // IntrinsicID |
| // FIXME: These are immediate values which can't be read from registers. |
| OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| // FIXME: Could we support packed types here? |
| OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); |
| // FIXME: These are immediate values which can't be read from registers. |
| OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); |
| break; |
| case Intrinsic::amdgcn_buffer_load: { |
| unsigned RSrc = MI.getOperand(2).getReg(); // SGPR |
| unsigned VIndex = MI.getOperand(3).getReg(); // VGPR |
| unsigned Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm |
| |
| unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned Size2 = MRI.getType(RSrc).getSizeInBits(); |
| unsigned Size3 = MRI.getType(VIndex).getSizeInBits(); |
| unsigned Size4 = MRI.getType(Offset).getSizeInBits(); |
| |
| unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI); |
| unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI); |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0); |
| OpdsMapping[1] = nullptr; // intrinsic id |
| |
| // Lie and claim everything is legal, even though some need to be |
| // SGPRs. applyMapping will have to deal with it as a waterfall loop. |
| OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc |
| OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3); |
| OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4); |
| OpdsMapping[5] = nullptr; |
| OpdsMapping[6] = nullptr; |
| break; |
| } |
| } |
| |
| break; |
| } |
| case AMDGPU::G_SELECT: { |
| unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); |
| unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, |
| AMDGPU::SGPRRegBankID); |
| unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); |
| unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); |
| bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID && |
| Op2Bank == AMDGPU::SGPRRegBankID && |
| Op3Bank == AMDGPU::SGPRRegBankID; |
| unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; |
| Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; |
| |
| if (Size == 64) { |
| OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); |
| OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1); |
| OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); |
| OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); |
| } else { |
| OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size); |
| OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1); |
| OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size); |
| OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size); |
| } |
| |
| break; |
| } |
| |
| case AMDGPU::G_LOAD: |
| return getInstrMappingForLoad(MI); |
| |
| case AMDGPU::G_ATOMICRMW_XCHG: |
| case AMDGPU::G_ATOMICRMW_ADD: |
| case AMDGPU::G_ATOMICRMW_SUB: |
| case AMDGPU::G_ATOMICRMW_AND: |
| case AMDGPU::G_ATOMICRMW_OR: |
| case AMDGPU::G_ATOMICRMW_XOR: |
| case AMDGPU::G_ATOMICRMW_MAX: |
| case AMDGPU::G_ATOMICRMW_MIN: |
| case AMDGPU::G_ATOMICRMW_UMAX: |
| case AMDGPU::G_ATOMICRMW_UMIN: |
| case AMDGPU::G_ATOMIC_CMPXCHG: { |
| return getDefaultMappingAllVGPR(MI); |
| } |
| case AMDGPU::G_BRCOND: { |
| unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI, |
| AMDGPU::SGPRRegBankID); |
| assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); |
| if (Bank != AMDGPU::SCCRegBankID) |
| Bank = AMDGPU::VCCRegBankID; |
| |
| OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); |
| break; |
| } |
| } |
| |
| return getInstructionMapping(/*ID*/1, /*Cost*/1, |
| getOperandsMapping(OpdsMapping), |
| MI.getNumOperands()); |
| } |
| |