blob: 22077e19a95279c3b5ddbb9b6fbf066a2e24780e [file] [log] [blame] [edit]
//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the NVPTX specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
#define DEBUG_TYPE "nvptx-subtarget"
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
static cl::opt<bool>
NoF16Math("nvptx-no-f16-math", cl::Hidden,
cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
cl::init(false));
static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
cl::desc("NVPTX Specific: Disable generation of "
"f32x2 instructions and registers."),
cl::init(false));
// FullSmVersion encoding helpers: SM * 10 + suffix offset
// (0 = base, 2 = 'f', 3 = 'a').
static constexpr unsigned SM(unsigned Version) { return Version * 10; }
static constexpr unsigned SMF(unsigned Version) { return SM(Version) + 2; }
static constexpr unsigned SMA(unsigned Version) { return SM(Version) + 3; }
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
// Returns the minimum PTX version required for a given SM target.
// This must be kept in sync with the "Supported Targets" column of the
// "PTX Release History" table in the PTX ISA documentation:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history
//
// Note: LLVM's minimum supported PTX version is 3.2 (see FeaturePTX in
// NVPTX.td), so older SMs that supported earlier PTX versions instead use 3.2
// as their effective minimum.
static unsigned getMinPTXVersionForSM(unsigned FullSmVersion) {
switch (FullSmVersion) {
case SM(20):
case SM(21):
case SM(30):
case SM(35):
return 32;
case SM(32):
case SM(50):
return 40;
case SM(37):
case SM(52):
return 41;
case SM(53):
return 42;
case SM(60):
case SM(61):
case SM(62):
return 50;
case SM(70):
return 60;
case SM(72):
return 61;
case SM(75):
return 63;
case SM(80):
return 70;
case SM(86):
return 71;
case SM(87):
return 74;
case SM(89):
case SM(90):
return 78;
case SMA(90):
return 80;
case SM(100):
case SMA(100):
case SM(101):
case SMA(101):
return 86;
case SM(120):
case SMA(120):
return 87;
case SMF(100):
case SMF(101):
case SM(103):
case SMF(103):
case SMA(103):
case SMF(120):
case SM(121):
case SMF(121):
case SMA(121):
return 88;
case SM(88):
case SM(110):
case SMF(110):
case SMA(110):
return 90;
default:
llvm_unreachable("Unknown SM version");
}
}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
TargetName = std::string(CPU);
ParseSubtargetFeatures(getTargetName(), /*TuneCPU=*/getTargetName(), FS);
// Re-map SM version numbers, SmVersion carries the regular SMs which do
// have relative order, while FullSmVersion allows distinguishing sm_90 from
// sm_90a, which would *not* be a subset of sm_91.
SmVersion = getSmVersion();
unsigned MinPTX = getMinPTXVersionForSM(FullSmVersion);
if (PTXVersion == 0) {
// User didn't request a specific PTX version; use the minimum for this SM.
PTXVersion = MinPTX;
} else if (PTXVersion < MinPTX) {
// User explicitly requested an insufficient PTX version.
reportFatalUsageError(formatv(
"PTX version {0}.{1} does not support target '{2}'. "
"Minimum required PTX version is {3}.{4}. "
"Either remove the PTX version to use the default, "
"or increase it to at least {3}.{4}.",
PTXVersion / 10, PTXVersion % 10, CPU, MinPTX / 10, MinPTX % 10));
}
return *this;
}
NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
FullSmVersion(200), SmVersion(getSmVersion()),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
}
NVPTXSubtarget::~NVPTXSubtarget() = default;
const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
return TSInfo.get();
}
bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const {
unsigned PTXVer = getPTXVersion();
if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
return false;
unsigned SMVer = getSmVersion();
return llvm::any_of(SMVersions, [&](unsigned SM) {
// sm_101 is a different family, never group it with sm_10x.
if (SMVer == 101 || SM == 101)
return SMVer == SM &&
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
// supported.
!(PTXVer >= 90 && SMVer == 101);
return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
});
}
bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const {
unsigned PTXVer = getPTXVersion();
if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
return false;
unsigned SMVer = getSmVersion();
return llvm::any_of(SMVersions, [&](unsigned SM) {
return SMVer == SM &&
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
// supported.
!(PTXVer >= 90 && SMVer == 101);
});
}
bool NVPTXSubtarget::allowFP16Math() const {
return hasFP16Math() && NoF16Math == false;
}
bool NVPTXSubtarget::hasF32x2Instructions() const {
return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2;
}
bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const {
if (!hasBF16Math())
return false;
switch (Opcode) {
// Several BF16 instructions are available on sm_90 only.
case ISD::FADD:
case ISD::FMUL:
case ISD::FSUB:
case ISD::SELECT:
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::FEXP2:
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FNEARBYINT:
case ISD::FRINT:
case ISD::FROUNDEVEN:
case ISD::FTRUNC:
return getSmVersion() >= 90 && getPTXVersion() >= 78;
// Several BF16 instructions are available on sm_80 only.
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMAXNUM_IEEE:
case ISD::FMINNUM_IEEE:
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
return getSmVersion() >= 80 && getPTXVersion() >= 70;
}
return true;
}
void NVPTXSubtarget::failIfClustersUnsupported(
std::string const &FailureMessage) const {
if (hasClusters())
return;
report_fatal_error(formatv(
"NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
"Requires SM >= 90 and PTX >= 78.",
getFullSmVersion(), PTXVersion, FailureMessage));
}