| //===-- SIProgramInfo.cpp ----------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// |
| /// The SIProgramInfo tracks resource usage and hardware flags for kernels and |
| /// entry functions. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| |
| #include "SIProgramInfo.h" |
| #include "GCNSubtarget.h" |
| #include "SIDefines.h" |
| #include "Utils/AMDGPUBaseInfo.h" |
| #include "llvm/MC/MCExpr.h" |
| |
| using namespace llvm; |
| |
| void SIProgramInfo::reset(const MachineFunction &MF) { |
| MCContext &Ctx = MF.getContext(); |
| |
| const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx); |
| |
| CodeSizeInBytes.reset(); |
| |
| VGPRBlocks = ZeroExpr; |
| SGPRBlocks = ZeroExpr; |
| Priority = 0; |
| FloatMode = 0; |
| Priv = 0; |
| DX10Clamp = 0; |
| DebugMode = 0; |
| IEEEMode = 0; |
| WgpMode = 0; |
| MemOrdered = 0; |
| FwdProgress = 0; |
| RrWgMode = 0; |
| ScratchSize = ZeroExpr; |
| |
| LDSBlocks = 0; |
| ScratchBlocks = ZeroExpr; |
| |
| ScratchEnable = ZeroExpr; |
| UserSGPR = 0; |
| TrapHandlerEnable = 0; |
| TGIdXEnable = 0; |
| TGIdYEnable = 0; |
| TGIdZEnable = 0; |
| TGSizeEnable = 0; |
| TIdIGCompCount = 0; |
| EXCPEnMSB = 0; |
| LdsSize = 0; |
| EXCPEnable = 0; |
| |
| ComputePGMRSrc3 = ZeroExpr; |
| |
| NumVGPR = ZeroExpr; |
| NumArchVGPR = ZeroExpr; |
| NumAccVGPR = ZeroExpr; |
| AccumOffset = ZeroExpr; |
| TgSplit = 0; |
| NumSGPR = ZeroExpr; |
| SGPRSpill = 0; |
| VGPRSpill = 0; |
| LDSSize = 0; |
| FlatUsed = ZeroExpr; |
| |
| NumSGPRsForWavesPerEU = ZeroExpr; |
| NumVGPRsForWavesPerEU = ZeroExpr; |
| Occupancy = ZeroExpr; |
| DynamicCallStack = ZeroExpr; |
| VCCUsed = ZeroExpr; |
| } |
| |
| static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, |
| const GCNSubtarget &ST) { |
| uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | |
| S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | |
| S_00B848_PRIV(ProgInfo.Priv) | |
| S_00B848_DEBUG_MODE(ProgInfo.DebugMode) | |
| S_00B848_WGP_MODE(ProgInfo.WgpMode) | |
| S_00B848_MEM_ORDERED(ProgInfo.MemOrdered); |
| |
| if (ST.hasDX10ClampMode()) |
| Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); |
| |
| if (ST.hasIEEEMode()) |
| Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); |
| |
| // TODO: in the long run we will want to enable this unconditionally. |
| if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA) |
| Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress); |
| |
| if (ST.hasRrWGMode()) |
| Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); |
| |
| return Reg; |
| } |
| |
| static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, |
| CallingConv::ID CC, const GCNSubtarget &ST) { |
| uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | |
| S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | |
| S_00B848_PRIV(ProgInfo.Priv) | |
| S_00B848_DEBUG_MODE(ProgInfo.DebugMode); |
| |
| if (ST.hasDX10ClampMode()) |
| Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); |
| |
| if (ST.hasIEEEMode()) |
| Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); |
| |
| if (ST.hasRrWGMode()) |
| Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); |
| |
| switch (CC) { |
| case CallingConv::AMDGPU_PS: |
| Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered); |
| break; |
| case CallingConv::AMDGPU_VS: |
| Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered); |
| break; |
| case CallingConv::AMDGPU_GS: |
| Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) | |
| S_00B228_MEM_ORDERED(ProgInfo.MemOrdered); |
| break; |
| case CallingConv::AMDGPU_HS: |
| Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) | |
| S_00B428_MEM_ORDERED(ProgInfo.MemOrdered); |
| break; |
| default: |
| break; |
| } |
| return Reg; |
| } |
| |
| static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) { |
| uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) | |
| S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) | |
| S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) | |
| S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) | |
| S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) | |
| S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) | |
| S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) | |
| S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) | |
| S_00B84C_LDS_SIZE(ProgInfo.LdsSize) | |
| S_00B84C_EXCP_EN(ProgInfo.EXCPEnable); |
| |
| return Reg; |
| } |
| |
| static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, |
| MCContext &Ctx) { |
| if (Mask) { |
| const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); |
| Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); |
| } |
| if (Shift) { |
| const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); |
| Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); |
| } |
| return Val; |
| } |
| |
| const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST, |
| MCContext &Ctx) const { |
| uint64_t Reg = getComputePGMRSrc1Reg(*this, ST); |
| const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); |
| const MCExpr *Res = MCBinaryExpr::createOr( |
| MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), |
| MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); |
| return MCBinaryExpr::createOr(RegExpr, Res, Ctx); |
| } |
| |
| const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC, |
| const GCNSubtarget &ST, |
| MCContext &Ctx) const { |
| if (AMDGPU::isCompute(CC)) { |
| return getComputePGMRSrc1(ST, Ctx); |
| } |
| |
| uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST); |
| const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); |
| const MCExpr *Res = MCBinaryExpr::createOr( |
| MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), |
| MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); |
| return MCBinaryExpr::createOr(RegExpr, Res, Ctx); |
| } |
| |
| const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const { |
| uint64_t Reg = getComputePGMRSrc2Reg(*this); |
| const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); |
| return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx); |
| } |
| |
| const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC, |
| MCContext &Ctx) const { |
| if (AMDGPU::isCompute(CC)) |
| return getComputePGMRSrc2(Ctx); |
| |
| return MCConstantExpr::create(0, Ctx); |
| } |
| |
| uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF, |
| bool IsLowerBound) { |
| if (!IsLowerBound && CodeSizeInBytes.has_value()) |
| return *CodeSizeInBytes; |
| |
| const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
| const SIInstrInfo *TII = STM.getInstrInfo(); |
| |
| uint64_t CodeSize = 0; |
| |
| for (const MachineBasicBlock &MBB : MF) { |
| // The amount of padding to align code can be both underestimated and |
| // overestimated. In case of inline asm used getInstSizeInBytes() will |
| // return a maximum size of a single instruction, where the real size may |
| // differ. At this point CodeSize may be already off. |
| if (!IsLowerBound) |
| CodeSize = alignTo(CodeSize, MBB.getAlignment()); |
| |
| for (const MachineInstr &MI : MBB) { |
| // TODO: CodeSize should account for multiple functions. |
| |
| if (MI.isMetaInstruction()) |
| continue; |
| |
| // We cannot properly estimate inline asm size. It can be as small as zero |
| // if that is just a comment. |
| if (IsLowerBound && MI.isInlineAsm()) |
| continue; |
| |
| CodeSize += TII->getInstSizeInBytes(MI); |
| } |
| } |
| |
| CodeSizeInBytes = CodeSize; |
| return CodeSize; |
| } |