| //===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // RTL Utilities for AMDGPU plugins |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include <cstdint> |
| |
| #include "Shared/Debug.h" |
| #include "Utils/ELF.h" |
| |
| #include "omptarget.h" |
| |
| #include "llvm/ADT/StringMap.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/Error.h" |
| |
| #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" |
| #include "llvm/BinaryFormat/ELF.h" |
| #include "llvm/BinaryFormat/MsgPackDocument.h" |
| #include "llvm/Support/MemoryBufferRef.h" |
| #include "llvm/Support/YAMLTraits.h" |
| |
| using namespace llvm::ELF; |
| |
| namespace llvm { |
| namespace omp { |
| namespace target { |
| namespace plugin { |
| namespace utils { |
| |
| // The implicit arguments of COV5 AMDGPU kernels. |
| struct AMDGPUImplicitArgsTy { |
| uint32_t BlockCountX; |
| uint32_t BlockCountY; |
| uint32_t BlockCountZ; |
| uint16_t GroupSizeX; |
| uint16_t GroupSizeY; |
| uint16_t GroupSizeZ; |
| uint8_t Unused0[46]; // 46 byte offset. |
| uint16_t GridDims; |
| uint8_t Unused1[54]; // 54 byte offset. |
| uint32_t DynamicLdsSize; |
| uint8_t Unused2[132]; // 132 byte offset. |
| }; |
| |
| // Dummy struct for COV4 implicitargs. |
| struct AMDGPUImplicitArgsTyCOV4 { |
| uint8_t Unused[56]; |
| }; |
| |
| inline uint32_t getImplicitArgsSize(uint16_t Version) { |
| return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 |
| ? sizeof(AMDGPUImplicitArgsTyCOV4) |
| : sizeof(AMDGPUImplicitArgsTy); |
| } |
| |
| /// Check if an image is compatible with current system's environment. The |
| /// system environment is given as a 'target-id' which has the form: |
| /// |
| /// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )* |
| /// |
| /// If a feature is not specific as '+' or '-' it is assumed to be in an 'any' |
| /// and is compatible with either '+' or '-'. The HSA runtime returns this |
| /// information using the target-id, while we use the ELF header to determine |
| /// these features. |
| inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags, |
| StringRef EnvTargetID) { |
| StringRef EnvArch = EnvTargetID.split(":").first; |
| |
| // Trivial check if the base processors match. |
| if (EnvArch != ImageArch) |
| return false; |
| |
| // Check if the image is requesting xnack on or off. |
| switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { |
| case EF_AMDGPU_FEATURE_XNACK_OFF_V4: |
| // The image is 'xnack-' so the environment must be 'xnack-'. |
| if (!EnvTargetID.contains("xnack-")) |
| return false; |
| break; |
| case EF_AMDGPU_FEATURE_XNACK_ON_V4: |
| // The image is 'xnack+' so the environment must be 'xnack+'. |
| if (!EnvTargetID.contains("xnack+")) |
| return false; |
| break; |
| case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: |
| case EF_AMDGPU_FEATURE_XNACK_ANY_V4: |
| default: |
| break; |
| } |
| |
| // Check if the image is requesting sramecc on or off. |
| switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { |
| case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: |
| // The image is 'sramecc-' so the environment must be 'sramecc-'. |
| if (!EnvTargetID.contains("sramecc-")) |
| return false; |
| break; |
| case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: |
| // The image is 'sramecc+' so the environment must be 'sramecc+'. |
| if (!EnvTargetID.contains("sramecc+")) |
| return false; |
| break; |
| case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: |
| case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: |
| break; |
| } |
| |
| return true; |
| } |
| |
| struct KernelMetaDataTy { |
| uint64_t KernelObject; |
| uint32_t GroupSegmentList; |
| uint32_t PrivateSegmentSize; |
| uint32_t SGPRCount; |
| uint32_t VGPRCount; |
| uint32_t SGPRSpillCount; |
| uint32_t VGPRSpillCount; |
| uint32_t KernelSegmentSize; |
| uint32_t ExplicitArgumentCount; |
| uint32_t ImplicitArgumentCount; |
| uint32_t RequestedWorkgroupSize[3]; |
| uint32_t WorkgroupSizeHint[3]; |
| uint32_t WavefronSize; |
| uint32_t MaxFlatWorkgroupSize; |
| }; |
| namespace { |
| |
| /// Reads the AMDGPU specific per-kernel-metadata from an image. |
| class KernelInfoReader { |
| public: |
| KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {} |
| |
| /// Process ELF note to read AMDGPU metadata from respective information |
| /// fields. |
| Error processNote(const object::ELF64LE::Note &Note, size_t Align) { |
| if (Note.getName() != "AMDGPU") |
| return Error::success(); // We are not interested in other things |
| |
| assert(Note.getType() == ELF::NT_AMDGPU_METADATA && |
| "Parse AMDGPU MetaData"); |
| auto Desc = Note.getDesc(Align); |
| StringRef MsgPackString = |
| StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size()); |
| msgpack::Document MsgPackDoc; |
| if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) |
| return Error::success(); |
| |
| AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); |
| if (!Verifier.verify(MsgPackDoc.getRoot())) |
| return Error::success(); |
| |
| auto RootMap = MsgPackDoc.getRoot().getMap(true); |
| |
| if (auto Err = iterateAMDKernels(RootMap)) |
| return Err; |
| |
| return Error::success(); |
| } |
| |
| private: |
| /// Extracts the relevant information via simple string look-up in the msgpack |
| /// document elements. |
| Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V, |
| std::string &KernelName, |
| KernelMetaDataTy &KernelData) { |
| if (!V.first.isString()) |
| return Error::success(); |
| |
| const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { |
| return DK.getString() == SK; |
| }; |
| |
| const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, |
| uint32_t *Vals) { |
| assert(DN.isArray() && "MsgPack DocNode is an array node"); |
| auto DNA = DN.getArray(); |
| assert(DNA.size() == 3 && "ArrayNode has at most three elements"); |
| |
| int I = 0; |
| for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; |
| ++DNABegin) { |
| Vals[I++] = DNABegin->getUInt(); |
| } |
| }; |
| |
| if (IsKey(V.first, ".name")) { |
| KernelName = V.second.toString(); |
| } else if (IsKey(V.first, ".sgpr_count")) { |
| KernelData.SGPRCount = V.second.getUInt(); |
| } else if (IsKey(V.first, ".sgpr_spill_count")) { |
| KernelData.SGPRSpillCount = V.second.getUInt(); |
| } else if (IsKey(V.first, ".vgpr_count")) { |
| KernelData.VGPRCount = V.second.getUInt(); |
| } else if (IsKey(V.first, ".vgpr_spill_count")) { |
| KernelData.VGPRSpillCount = V.second.getUInt(); |
| } else if (IsKey(V.first, ".private_segment_fixed_size")) { |
| KernelData.PrivateSegmentSize = V.second.getUInt(); |
| } else if (IsKey(V.first, ".group_segment_fixed_size")) { |
| KernelData.GroupSegmentList = V.second.getUInt(); |
| } else if (IsKey(V.first, ".reqd_workgroup_size")) { |
| GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); |
| } else if (IsKey(V.first, ".workgroup_size_hint")) { |
| GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); |
| } else if (IsKey(V.first, ".wavefront_size")) { |
| KernelData.WavefronSize = V.second.getUInt(); |
| } else if (IsKey(V.first, ".max_flat_workgroup_size")) { |
| KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); |
| } |
| |
| return Error::success(); |
| } |
| |
| /// Get the "amdhsa.kernels" element from the msgpack Document |
| Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) { |
| auto Res = MDN.find("amdhsa.kernels"); |
| if (Res == MDN.end()) |
| return createStringError(inconvertibleErrorCode(), |
| "Could not find amdhsa.kernels key"); |
| |
| auto Pair = *Res; |
| assert(Pair.second.isArray() && |
| "AMDGPU kernel entries are arrays of entries"); |
| |
| return Pair.second.getArray(); |
| } |
| |
| /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a |
| /// MapDocNode that either maps a string to a single value (most of them) or |
| /// to another array of things. Currently, we only handle the case that maps |
| /// to scalar value. |
| Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { |
| KernelMetaDataTy KernelData; |
| std::string KernelName; |
| auto Entry = (*It).getMap(); |
| for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) |
| if (auto Err = extractKernelData(*MI, KernelName, KernelData)) |
| return Err; |
| |
| KernelInfoMap.insert({KernelName, KernelData}); |
| return Error::success(); |
| } |
| |
| /// Go over the list of AMD kernels in the "amdhsa.kernels" entry |
| Error iterateAMDKernels(msgpack::MapDocNode &MDN) { |
| auto KernelsOrErr = getAMDKernelsArray(MDN); |
| if (auto Err = KernelsOrErr.takeError()) |
| return Err; |
| |
| auto KernelsArr = *KernelsOrErr; |
| for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { |
| if (!It->isMap()) |
| continue; // we expect <key,value> pairs |
| |
| // Obtain the value for the different entries. Each array entry is a |
| // MapDocNode |
| if (auto Err = generateKernelInfo(It)) |
| return Err; |
| } |
| return Error::success(); |
| } |
| |
| // Kernel names are the keys |
| StringMap<KernelMetaDataTy> &KernelInfoMap; |
| }; |
| } // namespace |
| |
| /// Reads the AMDGPU specific metadata from the ELF file and propagates the |
| /// KernelInfoMap |
| inline Error |
| readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer, |
| StringMap<KernelMetaDataTy> &KernelInfoMap, |
| uint16_t &ELFABIVersion) { |
| Error Err = Error::success(); // Used later as out-parameter |
| |
| auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); |
| if (auto Err = ELFOrError.takeError()) |
| return Err; |
| |
| const object::ELF64LEFile ELFObj = ELFOrError.get(); |
| ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections()); |
| KernelInfoReader Reader(KernelInfoMap); |
| |
| // Read the code object version from ELF image header |
| auto Header = ELFObj.getHeader(); |
| ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); |
| DP("ELFABIVERSION Version: %u\n", ELFABIVersion); |
| |
| for (const auto &S : Sections) { |
| if (S.sh_type != ELF::SHT_NOTE) |
| continue; |
| |
| for (const auto N : ELFObj.notes(S, Err)) { |
| if (Err) |
| return Err; |
| // Fills the KernelInfoTabel entries in the reader |
| if ((Err = Reader.processNote(N, S.sh_addralign))) |
| return Err; |
| } |
| } |
| |
| return Error::success(); |
| } |
| |
| } // namespace utils |
| } // namespace plugin |
| } // namespace target |
| } // namespace omp |
| } // namespace llvm |