| //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // \file |
| // This file implements a TargetTransformInfo analysis pass specific to the |
| // R600 target machine. It uses the target's detailed information to provide |
| // more precise answers to certain TTI queries, while letting the target |
| // independent and default TTI implementations handle the rest. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "R600TargetTransformInfo.h" |
| #include "AMDGPU.h" |
| #include "AMDGPUTargetMachine.h" |
| #include "R600Subtarget.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "R600tti" |
| |
| R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) |
| : BaseT(TM, F.getParent()->getDataLayout()), |
| ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), |
| TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} |
| |
| unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { |
| return 4 * 128; // XXX - 4 channels. Should these count as vector instead? |
| } |
| |
| unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { |
| return getHardwareNumberOfRegisters(Vec); |
| } |
| |
| TypeSize |
| R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
| return TypeSize::getFixed(32); |
| } |
| |
| unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } |
| |
| unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { |
| if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || |
| AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) |
| return 128; |
| if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || |
| AddrSpace == AMDGPUAS::REGION_ADDRESS) |
| return 64; |
| if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) |
| return 32; |
| |
| if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || |
| AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || |
| (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && |
| AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) |
| return 128; |
| llvm_unreachable("unhandled address space"); |
| } |
| |
| bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, |
| Align Alignment, |
| unsigned AddrSpace) const { |
| // We allow vectorization of flat stores, even though we may need to decompose |
| // them later if they may access private memory. We don't have enough context |
| // here, and legalization can handle it. |
| return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); |
| } |
| |
| bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, |
| Align Alignment, |
| unsigned AddrSpace) const { |
| return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); |
| } |
| |
| bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, |
| Align Alignment, |
| unsigned AddrSpace) const { |
| return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); |
| } |
| |
| unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { |
| // Disable unrolling if the loop is not vectorized. |
| // TODO: Enable this again. |
| if (VF == 1) |
| return 1; |
| |
| return 8; |
| } |
| |
| InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, |
| TTI::TargetCostKind CostKind, |
| const Instruction *I) { |
| if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) |
| return Opcode == Instruction::PHI ? 0 : 1; |
| |
| // XXX - For some reason this isn't called for switch. |
| switch (Opcode) { |
| case Instruction::Br: |
| case Instruction::Ret: |
| return 10; |
| default: |
| return BaseT::getCFInstrCost(Opcode, CostKind, I); |
| } |
| } |
| |
| InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, |
| unsigned Index) { |
| switch (Opcode) { |
| case Instruction::ExtractElement: |
| case Instruction::InsertElement: { |
| unsigned EltSize = |
| DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); |
| if (EltSize < 32) { |
| return BaseT::getVectorInstrCost(Opcode, ValTy, Index); |
| } |
| |
| // Extracts are just reads of a subregister, so are free. Inserts are |
| // considered free because we don't want to have any cost for scalarizing |
| // operations, and we don't have to copy into a different register class. |
| |
| // Dynamic indexing isn't free and is best avoided. |
| return Index == ~0u ? 2 : 0; |
| } |
| default: |
| return BaseT::getVectorInstrCost(Opcode, ValTy, Index); |
| } |
| } |
| |
| void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
| TTI::UnrollingPreferences &UP, |
| OptimizationRemarkEmitter *ORE) { |
| CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); |
| } |
| |
| void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
| TTI::PeelingPreferences &PP) { |
| CommonTTI.getPeelingPreferences(L, SE, PP); |
| } |