blob: 1c169b05841c39db8a061c4434212948b81a6969 [file] [edit]
//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the NVPTX target.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Target-independent interfaces
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
include "NVPTXRegisterInfo.td"
include "NVPTXInstrInfo.td"
//===----------------------------------------------------------------------===//
// Subtarget Features.
// - We use the SM version number instead of explicit feature table.
// - Need at least one feature to avoid generating zero sized array by
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//
class FeatureSM<string sm, int value>:
SubtargetFeature<"sm_"# sm, "FullSmVersion",
"" # value,
"Target SM " # sm>;
class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;
// NVPTX Architecture Hierarchy and Ordering:
//
// GPU architectures: sm_2Y/sm_3Y/sm_5Y/sm_6Y/sm_7Y/sm_8Y/sm_9Y/sm_10Y/sm_12Y
// ('Y' represents version within the architecture)
// The architectures have name of form sm_XYz where 'X' represent the generation
// number, 'Y' represents the version within the architecture, and 'z' represents
// the optional feature suffix.
// If X1Y1 <= X2Y2, then GPU capabilities of sm_X1Y1 are included in sm_X2Y2.
// For example, take sm_90 (9 represents 'X', 0 represents 'Y', and no feature
// suffix) and sm_103 architectures (10 represents 'X', 3 represents 'Y', and no
// feature suffix). Since 90 <= 103, sm_90 is compatible with sm_103.
//
// The family-specific variants have 'f' feature suffix and they follow
// following order:
// sm_X{Y2}f > sm_X{Y1}f iff Y2 > Y1
// sm_XY{f} > sm_{XY}{}
//
// For example, take sm_100f (10 represents 'X', 0 represents 'Y', and 'f'
// represents 'z') and sm_103f (10 represents 'X', 3 represents 'Y', and 'f'
// represents 'z') architecture variants. Since Y1 < Y2, sm_100f is compatible with
// sm_103f. Similarly based on the second rule, sm_90 is compatible with sm_103f.
//
// Some counter examples, take sm_100f and sm_120f (12 represents 'X', 0
// represents 'Y', and 'f' represents 'z') architecture variants. Since both
// belongs to different family i.e. X1 != X2, sm_100f is not compatible with
// sm_120f.
//
// The architecture-specific variants have 'a' feature suffix and they follow
// following order:
// sm_XY{a} > sm_XY{f} > sm_{XY}{}
//
// For example, take sm_103a (10 represents 'X', 3 represents 'Y', and 'a'
// represents 'z'), sm_103f, and sm_103 architecture variants. The sm_103 is
// compatible with sm_103a and sm_103f, and sm_103f is compatible with sm_103a.
//
// Encoding := Arch * 10 + ArchSuffixOffset
// Arch := X * 10 + Y
// ArchSuffixOffset := 0 (base), 2 ('f'), or 3 ('a')
//
// For example, sm_103a is encoded as 1033 (103 * 10 + 3) and sm_103f is
// encoded as 1032 (103 * 10 + 2).
//
// This encoding allows simple partial ordering of the architectures.
// + Compare Family and Arch by dividing FullSMVersion by 100 and 10
// respectively before the comparison.
// + Compare within the family by comparing FullSMVersion, given both belongs to
// the same family.
// + Detect 'a' variants by checking FullSMVersion & 1.
class Proc<FeatureSM SM>
: Processor<SM.Name, NoItineraries, [SM]>;
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, 60,
61, 62, 70, 72, 75, 80, 86, 87, 88, 89,
90, 100, 101, 103, 110, 120, 121] in {
// Base SM version (e.g. FullSMVersion for sm_100 is 1000)
def SM#sm : FeatureSM<""#sm, !mul(sm, 10)>;
def : Proc<!cast<FeatureSM>("SM"#sm)>;
// Family-specific variants, compatible within same family (e.g. sm_100f = 1002)
if !ge(sm, 100) then {
def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
def : Proc<!cast<FeatureSM>("SM"#sm#"f")>;
}
// Architecture-specific variants, incompatible across architectures (e.g. sm_100a = 1003)
if !ge(sm, 90) then {
def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>;
def : Proc<!cast<FeatureSM>("SM"#sm#"a")>;
}
}
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72,
73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88,
90, 91, 92] in
def PTX#version : FeaturePTX<version>;
def Is64Bit : Predicate<"Subtarget->getTargetTriple().getArch() == Triple::nvptx64">;
def NVPTX64 : HwMode<[Is64Bit]>;
def nvptx_ptr_rc : RegClassByHwMode<
[DefaultMode, NVPTX64],
[B32, B64]>;
defm : RemapAllTargetPseudoPointerOperands<nvptx_ptr_rc>;
def NVPTXInstrInfo : InstrInfo {
}
def NVPTXAsmWriter : AsmWriter {
int PassSubtarget = 1;
}
def NVPTX : Target {
let InstructionSet = NVPTXInstrInfo;
let AssemblyWriters = [NVPTXAsmWriter];
}