clang/lib/Basic/Targets/NVPTX.cpp - llvm-project - Git at Google

 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements NVPTX TargetInfo objects.
 //
 //===----------------------------------------------------------------------===//

 #include "NVPTX.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"

 using namespace clang;
 using namespace clang::targets;

 static constexpr int NumBuiltins =
     clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;

 #define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsNVPTX.inc"
 #undef GET_BUILTIN_STR_TABLE

 static constexpr Builtin::Info BuiltinInfos[] = {
 #define GET_BUILTIN_INFOS
 #include "clang/Basic/BuiltinsNVPTX.inc"
 #undef GET_BUILTIN_INFOS
 };
 static_assert(std::size(BuiltinInfos) == NumBuiltins);

 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};

 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
                                  const TargetOptions &Opts,
                                  unsigned TargetPointerWidth)
     : TargetInfo(Triple) {
   assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
          "NVPTX only supports 32- and 64-bit modes.");

   // PTXVersion is 0 by default, meaning "use the minimum for the SM target".
   // Only set it if the user explicitly requested a PTX version.
   PTXVersion = 0;
   for (const StringRef Feature : Opts.FeaturesAsWritten) {
     int PTXV;
     if (!Feature.starts_with("+ptx") ||
         Feature.drop_front(4).getAsInteger(10, PTXV))
       continue;
     PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
   }

   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
   UseAddrSpaceMapMangling = true;
   // __bf16 is always available as a load/store only type.
   BFloat16Width = BFloat16Align = 16;
   BFloat16Format = &llvm::APFloat::BFloat();

   // Define available target features
   // These must be defined in sorted order!
   NoAsmVariants = true;
   GPU = OffloadArch::UNUSED;

   // PTX supports f16 as a fundamental type.
   HasFastHalfType = true;
   HasFloat16 = true;

   // TODO: Make shortptr a proper ABI?
   DataLayoutString =
       Triple.computeDataLayout(Opts.NVPTXUseShortPointers ? "shortptr" : "");

   // If possible, get a TargetInfo for our host triple, so we can match its
   // types.
   llvm::Triple HostTriple(Opts.HostTriple);
   if (!HostTriple.isNVPTX())
     HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);

   // If no host target, make some guesses about the data layout and return.
   if (!HostTarget) {
     LongWidth = LongAlign = TargetPointerWidth;
     PointerWidth = PointerAlign = TargetPointerWidth;
     switch (TargetPointerWidth) {
     case 32:
       SizeType = TargetInfo::UnsignedInt;
       PtrDiffType = TargetInfo::SignedInt;
       IntPtrType = TargetInfo::SignedInt;
       break;
     case 64:
       SizeType = TargetInfo::UnsignedLong;
       PtrDiffType = TargetInfo::SignedLong;
       IntPtrType = TargetInfo::SignedLong;
       break;
     default:
       llvm_unreachable("TargetPointerWidth must be 32 or 64");
     }

     MaxAtomicInlineWidth = TargetPointerWidth;
     return;
   }

   // Copy properties from host target.
   PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
   PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
   BoolWidth = HostTarget->getBoolWidth();
   BoolAlign = HostTarget->getBoolAlign();
   IntWidth = HostTarget->getIntWidth();
   IntAlign = HostTarget->getIntAlign();
   HalfWidth = HostTarget->getHalfWidth();
   HalfAlign = HostTarget->getHalfAlign();
   FloatWidth = HostTarget->getFloatWidth();
   FloatAlign = HostTarget->getFloatAlign();
   DoubleWidth = HostTarget->getDoubleWidth();
   DoubleAlign = HostTarget->getDoubleAlign();
   LongWidth = HostTarget->getLongWidth();
   LongAlign = HostTarget->getLongAlign();
   LongLongWidth = HostTarget->getLongLongWidth();
   LongLongAlign = HostTarget->getLongLongAlign();
   MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0,
                                                  /* HasNonWeakDef = */ true);
   NewAlign = HostTarget->getNewAlign();
   DefaultAlignForAttributeAligned =
       HostTarget->getDefaultAlignForAttributeAligned();
   SizeType = HostTarget->getSizeType();
   IntMaxType = HostTarget->getIntMaxType();
   PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
   IntPtrType = HostTarget->getIntPtrType();
   WCharType = HostTarget->getWCharType();
   WIntType = HostTarget->getWIntType();
   Char16Type = HostTarget->getChar16Type();
   Char32Type = HostTarget->getChar32Type();
   Int64Type = HostTarget->getInt64Type();
   SigAtomicType = HostTarget->getSigAtomicType();
   ProcessIDType = HostTarget->getProcessIDType();

   UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
   UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
   UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
   ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();

   // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
   // we need those macros to be identical on host and device, because (among
   // other things) they affect which standard library classes are defined, and
   // we need all classes to be defined on both the host and device.
   MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();

   // Properties intentionally not copied from host:
   // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
   //   host/device boundary.
   // - SuitableAlign: Not visible across the host/device boundary, and may
   //   correctly be different on host/device, e.g. if host has wider vector
   //   types than device.
   // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
   //   as its double type, but that's not necessarily true on the host.
   //   TODO: nvcc emits a warning when using long double on device; we should
   //   do the same.
 }

 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
   return llvm::ArrayRef(GCCRegNames);
 }

 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Cases({"ptx", "nvptx"}, true)
       .Default(false);
 }

 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
                                        MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");

   // Skip setting architecture dependent macros if undefined.
   if (!IsNVIDIAOffloadArch(GPU))
     return;

   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
     // Set __CUDA_ARCH__ for the GPU specified.
     unsigned ArchID = CudaArchToID(GPU);
     Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID));

     if (IsNVIDIAAcceleratedOffloadArch(GPU))
       Builder.defineMacro(
           "__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + "_ALL", "1");
   }
 }

 llvm::SmallVector<Builtin::InfosShard>
 NVPTXTargetInfo::getTargetBuiltins() const {
   return {{&BuiltinStrings, BuiltinInfos}};
 }
	//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements NVPTX TargetInfo objects.
	//
	//===----------------------------------------------------------------------===//

	#include "NVPTX.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/MacroBuilder.h"
	#include "clang/Basic/TargetBuiltins.h"
	#include "llvm/ADT/StringSwitch.h"

	using namespace clang;
	using namespace clang::targets;

	static constexpr int NumBuiltins =
	clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;

	#define GET_BUILTIN_STR_TABLE
	#include "clang/Basic/BuiltinsNVPTX.inc"
	#undef GET_BUILTIN_STR_TABLE

	static constexpr Builtin::Info BuiltinInfos[] = {
	#define GET_BUILTIN_INFOS
	#include "clang/Basic/BuiltinsNVPTX.inc"
	#undef GET_BUILTIN_INFOS
	};
	static_assert(std::size(BuiltinInfos) == NumBuiltins);

	const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};

	NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
	const TargetOptions &Opts,
	unsigned TargetPointerWidth)
	: TargetInfo(Triple) {
	assert((TargetPointerWidth == 32 \|\| TargetPointerWidth == 64) &&
	"NVPTX only supports 32- and 64-bit modes.");

	// PTXVersion is 0 by default, meaning "use the minimum for the SM target".
	// Only set it if the user explicitly requested a PTX version.
	PTXVersion = 0;
	for (const StringRef Feature : Opts.FeaturesAsWritten) {
	int PTXV;
	if (!Feature.starts_with("+ptx") \|\|
	Feature.drop_front(4).getAsInteger(10, PTXV))
	continue;
	PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
	}

	TLSSupported = false;
	VLASupported = false;
	AddrSpaceMap = &NVPTXAddrSpaceMap;
	UseAddrSpaceMapMangling = true;
	// __bf16 is always available as a load/store only type.
	BFloat16Width = BFloat16Align = 16;
	BFloat16Format = &llvm::APFloat::BFloat();

	// Define available target features
	// These must be defined in sorted order!
	NoAsmVariants = true;
	GPU = OffloadArch::UNUSED;

	// PTX supports f16 as a fundamental type.
	HasFastHalfType = true;
	HasFloat16 = true;

	// TODO: Make shortptr a proper ABI?
	DataLayoutString =
	Triple.computeDataLayout(Opts.NVPTXUseShortPointers ? "shortptr" : "");

	// If possible, get a TargetInfo for our host triple, so we can match its
	// types.
	llvm::Triple HostTriple(Opts.HostTriple);
	if (!HostTriple.isNVPTX())
	HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);

	// If no host target, make some guesses about the data layout and return.
	if (!HostTarget) {
	LongWidth = LongAlign = TargetPointerWidth;
	PointerWidth = PointerAlign = TargetPointerWidth;
	switch (TargetPointerWidth) {
	case 32:
	SizeType = TargetInfo::UnsignedInt;
	PtrDiffType = TargetInfo::SignedInt;
	IntPtrType = TargetInfo::SignedInt;
	break;
	case 64:
	SizeType = TargetInfo::UnsignedLong;
	PtrDiffType = TargetInfo::SignedLong;
	IntPtrType = TargetInfo::SignedLong;
	break;
	default:
	llvm_unreachable("TargetPointerWidth must be 32 or 64");
	}

	MaxAtomicInlineWidth = TargetPointerWidth;
	return;
	}

	// Copy properties from host target.
	PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
	PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
	BoolWidth = HostTarget->getBoolWidth();
	BoolAlign = HostTarget->getBoolAlign();
	IntWidth = HostTarget->getIntWidth();
	IntAlign = HostTarget->getIntAlign();
	HalfWidth = HostTarget->getHalfWidth();
	HalfAlign = HostTarget->getHalfAlign();
	FloatWidth = HostTarget->getFloatWidth();
	FloatAlign = HostTarget->getFloatAlign();
	DoubleWidth = HostTarget->getDoubleWidth();
	DoubleAlign = HostTarget->getDoubleAlign();
	LongWidth = HostTarget->getLongWidth();
	LongAlign = HostTarget->getLongAlign();
	LongLongWidth = HostTarget->getLongLongWidth();
	LongLongAlign = HostTarget->getLongLongAlign();
	MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0,
	/* HasNonWeakDef = */ true);
	NewAlign = HostTarget->getNewAlign();
	DefaultAlignForAttributeAligned =
	HostTarget->getDefaultAlignForAttributeAligned();
	SizeType = HostTarget->getSizeType();
	IntMaxType = HostTarget->getIntMaxType();
	PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
	IntPtrType = HostTarget->getIntPtrType();
	WCharType = HostTarget->getWCharType();
	WIntType = HostTarget->getWIntType();
	Char16Type = HostTarget->getChar16Type();
	Char32Type = HostTarget->getChar32Type();
	Int64Type = HostTarget->getInt64Type();
	SigAtomicType = HostTarget->getSigAtomicType();
	ProcessIDType = HostTarget->getProcessIDType();

	UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
	UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
	UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
	ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();

	// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
	// we need those macros to be identical on host and device, because (among
	// other things) they affect which standard library classes are defined, and
	// we need all classes to be defined on both the host and device.
	MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();

	// Properties intentionally not copied from host:
	// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
	// host/device boundary.
	// - SuitableAlign: Not visible across the host/device boundary, and may
	// correctly be different on host/device, e.g. if host has wider vector
	// types than device.
	// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
	// as its double type, but that's not necessarily true on the host.
	// TODO: nvcc emits a warning when using long double on device; we should
	// do the same.
	}

	ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
	return llvm::ArrayRef(GCCRegNames);
	}

	bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
	return llvm::StringSwitch<bool>(Feature)
	.Cases({"ptx", "nvptx"}, true)
	.Default(false);
	}

	void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
	MacroBuilder &Builder) const {
	Builder.defineMacro("__PTX__");
	Builder.defineMacro("__NVPTX__");

	// Skip setting architecture dependent macros if undefined.
	if (!IsNVIDIAOffloadArch(GPU))
	return;

	if (Opts.CUDAIsDevice \|\| Opts.OpenMPIsTargetDevice \|\| !HostTarget) {
	// Set __CUDA_ARCH__ for the GPU specified.
	unsigned ArchID = CudaArchToID(GPU);
	Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID));

	if (IsNVIDIAAcceleratedOffloadArch(GPU))
	Builder.defineMacro(
	"__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + "_ALL", "1");
	}
	}

	llvm::SmallVector<Builtin::InfosShard>
	NVPTXTargetInfo::getTargetBuiltins() const {
	return {{&BuiltinStrings, BuiltinInfos}};
	}