| //===- OffloadBundle.cpp - Utilities for offload bundles---*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------===// |
| |
| #include "llvm/Object/OffloadBundle.h" |
| #include "llvm/BinaryFormat/Magic.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IRReader/IRReader.h" |
| #include "llvm/MC/StringTableBuilder.h" |
| #include "llvm/Object/Archive.h" |
| #include "llvm/Object/Binary.h" |
| #include "llvm/Object/COFF.h" |
| #include "llvm/Object/ELFObjectFile.h" |
| #include "llvm/Object/Error.h" |
| #include "llvm/Object/IRObjectFile.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "llvm/Support/BinaryStreamReader.h" |
| #include "llvm/Support/SourceMgr.h" |
| #include "llvm/Support/Timer.h" |
| |
| using namespace llvm; |
| using namespace llvm::object; |
| |
| static llvm::TimerGroup |
| OffloadBundlerTimerGroup("Offload Bundler Timer Group", |
| "Timer group for offload bundler"); |
| |
| // Extract an Offload bundle (usually a Offload Bundle) from a fat_bin |
| // section |
| Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset, |
| StringRef FileName, |
| SmallVectorImpl<OffloadBundleFatBin> &Bundles) { |
| |
| size_t Offset = 0; |
| size_t NextbundleStart = 0; |
| |
| // There could be multiple offloading bundles stored at this section. |
| while (NextbundleStart != StringRef::npos) { |
| std::unique_ptr<MemoryBuffer> Buffer = |
| MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "", |
| /*RequiresNullTerminator=*/false); |
| |
| // Create the FatBinBindle object. This will also create the Bundle Entry |
| // list info. |
| auto FatBundleOrErr = |
| OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName); |
| if (!FatBundleOrErr) |
| return FatBundleOrErr.takeError(); |
| |
| // Add current Bundle to list. |
| Bundles.emplace_back(std::move(**FatBundleOrErr)); |
| |
| // Find the next bundle by searching for the magic string |
| StringRef Str = Buffer->getBuffer(); |
| NextbundleStart = Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24); |
| |
| if (NextbundleStart != StringRef::npos) |
| Offset += NextbundleStart; |
| } |
| |
| return Error::success(); |
| } |
| |
| Error OffloadBundleFatBin::readEntries(StringRef Buffer, |
| uint64_t SectionOffset) { |
| uint64_t NumOfEntries = 0; |
| |
| BinaryStreamReader Reader(Buffer, llvm::endianness::little); |
| |
| // Read the Magic String first. |
| StringRef Magic; |
| if (auto EC = Reader.readFixedString(Magic, 24)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| // Read the number of Code Objects (Entries) in the current Bundle. |
| if (auto EC = Reader.readInteger(NumOfEntries)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| NumberOfEntries = NumOfEntries; |
| |
| // For each Bundle Entry (code object) |
| for (uint64_t I = 0; I < NumOfEntries; I++) { |
| uint64_t EntrySize; |
| uint64_t EntryOffset; |
| uint64_t EntryIDSize; |
| StringRef EntryID; |
| |
| if (auto EC = Reader.readInteger(EntryOffset)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| if (auto EC = Reader.readInteger(EntrySize)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| if (auto EC = Reader.readInteger(EntryIDSize)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| if (auto EC = Reader.readFixedString(EntryID, EntryIDSize)) |
| return errorCodeToError(object_error::parse_failed); |
| |
| auto Entry = std::make_unique<OffloadBundleEntry>( |
| EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID); |
| |
| Entries.push_back(*Entry); |
| } |
| |
| return Error::success(); |
| } |
| |
| Expected<std::unique_ptr<OffloadBundleFatBin>> |
| OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset, |
| StringRef FileName) { |
| if (Buf.getBufferSize() < 24) |
| return errorCodeToError(object_error::parse_failed); |
| |
| // Check for magic bytes. |
| if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle) |
| return errorCodeToError(object_error::parse_failed); |
| |
| OffloadBundleFatBin *TheBundle = new OffloadBundleFatBin(Buf, FileName); |
| |
| // Read the Bundle Entries |
| Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset); |
| if (Err) |
| return errorCodeToError(object_error::parse_failed); |
| |
| return std::unique_ptr<OffloadBundleFatBin>(TheBundle); |
| } |
| |
| Error OffloadBundleFatBin::extractBundle(const ObjectFile &Source) { |
| // This will extract all entries in the Bundle |
| for (OffloadBundleEntry &Entry : Entries) { |
| |
| if (Entry.Size == 0) |
| continue; |
| |
| // create output file name. Which should be |
| // <fileName>-offset<Offset>-size<Size>.co" |
| std::string Str = getFileName().str() + "-offset" + itostr(Entry.Offset) + |
| "-size" + itostr(Entry.Size) + ".co"; |
| if (Error Err = object::extractCodeObject(Source, Entry.Offset, Entry.Size, |
| StringRef(Str))) |
| return Err; |
| } |
| |
| return Error::success(); |
| } |
| |
| Error object::extractOffloadBundleFatBinary( |
| const ObjectFile &Obj, SmallVectorImpl<OffloadBundleFatBin> &Bundles) { |
| assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type"); |
| |
| // Iterate through Sections until we find an offload_bundle section. |
| for (SectionRef Sec : Obj.sections()) { |
| Expected<StringRef> Buffer = Sec.getContents(); |
| if (!Buffer) |
| return Buffer.takeError(); |
| |
| // If it does not start with the reserved suffix, just skip this section. |
| if ((llvm::identify_magic(*Buffer) == llvm::file_magic::offload_bundle) || |
| (llvm::identify_magic(*Buffer) == |
| llvm::file_magic::offload_bundle_compressed)) { |
| |
| uint64_t SectionOffset = 0; |
| if (Obj.isELF()) { |
| SectionOffset = ELFSectionRef(Sec).getOffset(); |
| } else if (Obj.isCOFF()) // TODO: add COFF Support |
| return createStringError(object_error::parse_failed, |
| "COFF object files not supported.\n"); |
| |
| MemoryBufferRef Contents(*Buffer, Obj.getFileName()); |
| |
| if (llvm::identify_magic(*Buffer) == |
| llvm::file_magic::offload_bundle_compressed) { |
| // Decompress the input if necessary. |
| Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr = |
| CompressedOffloadBundle::decompress(Contents, false); |
| |
| if (!DecompressedBufferOrErr) |
| return createStringError( |
| inconvertibleErrorCode(), |
| "Failed to decompress input: " + |
| llvm::toString(DecompressedBufferOrErr.takeError())); |
| |
| MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr; |
| if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset, |
| Obj.getFileName(), Bundles)) |
| return Err; |
| } else { |
| if (Error Err = extractOffloadBundle(Contents, SectionOffset, |
| Obj.getFileName(), Bundles)) |
| return Err; |
| } |
| } |
| } |
| return Error::success(); |
| } |
| |
| Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset, |
| int64_t Size, StringRef OutputFileName) { |
| Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = |
| FileOutputBuffer::create(OutputFileName, Size); |
| |
| if (!BufferOrErr) |
| return BufferOrErr.takeError(); |
| |
| Expected<MemoryBufferRef> InputBuffOrErr = Source.getMemoryBufferRef(); |
| if (Error Err = InputBuffOrErr.takeError()) |
| return Err; |
| |
| std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); |
| std::copy(InputBuffOrErr->getBufferStart() + Offset, |
| InputBuffOrErr->getBufferStart() + Offset + Size, |
| Buf->getBufferStart()); |
| if (Error E = Buf->commit()) |
| return E; |
| |
| return Error::success(); |
| } |
| |
| // given a file name, offset, and size, extract data into a code object file, |
| // into file <SourceFile>-offset<Offset>-size<Size>.co |
| Error object::extractOffloadBundleByURI(StringRef URIstr) { |
| // create a URI object |
| Expected<std::unique_ptr<OffloadBundleURI>> UriOrErr( |
| OffloadBundleURI::createOffloadBundleURI(URIstr, FILE_URI)); |
| if (!UriOrErr) |
| return UriOrErr.takeError(); |
| |
| OffloadBundleURI &Uri = **UriOrErr; |
| std::string OutputFile = Uri.FileName.str(); |
| OutputFile += |
| "-offset" + itostr(Uri.Offset) + "-size" + itostr(Uri.Size) + ".co"; |
| |
| // Create an ObjectFile object from uri.file_uri |
| auto ObjOrErr = ObjectFile::createObjectFile(Uri.FileName); |
| if (!ObjOrErr) |
| return ObjOrErr.takeError(); |
| |
| auto Obj = ObjOrErr->getBinary(); |
| if (Error Err = |
| object::extractCodeObject(*Obj, Uri.Offset, Uri.Size, OutputFile)) |
| return Err; |
| |
| return Error::success(); |
| } |
| |
| // Utility function to format numbers with commas |
| static std::string formatWithCommas(unsigned long long Value) { |
| std::string Num = std::to_string(Value); |
| int InsertPosition = Num.length() - 3; |
| while (InsertPosition > 0) { |
| Num.insert(InsertPosition, ","); |
| InsertPosition -= 3; |
| } |
| return Num; |
| } |
| |
| llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> |
| CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input, |
| bool Verbose) { |
| StringRef Blob = Input.getBuffer(); |
| |
| if (Blob.size() < V1HeaderSize) |
| return llvm::MemoryBuffer::getMemBufferCopy(Blob); |
| |
| if (llvm::identify_magic(Blob) != |
| llvm::file_magic::offload_bundle_compressed) { |
| if (Verbose) |
| llvm::errs() << "Uncompressed bundle.\n"; |
| return llvm::MemoryBuffer::getMemBufferCopy(Blob); |
| } |
| |
| size_t CurrentOffset = MagicSize; |
| |
| uint16_t ThisVersion; |
| memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t)); |
| CurrentOffset += VersionFieldSize; |
| |
| uint16_t CompressionMethod; |
| memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t)); |
| CurrentOffset += MethodFieldSize; |
| |
| uint32_t TotalFileSize; |
| if (ThisVersion >= 2) { |
| if (Blob.size() < V2HeaderSize) |
| return createStringError(inconvertibleErrorCode(), |
| "Compressed bundle header size too small"); |
| memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t)); |
| CurrentOffset += FileSizeFieldSize; |
| } |
| |
| uint32_t UncompressedSize; |
| memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t)); |
| CurrentOffset += UncompressedSizeFieldSize; |
| |
| uint64_t StoredHash; |
| memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t)); |
| CurrentOffset += HashFieldSize; |
| |
| llvm::compression::Format CompressionFormat; |
| if (CompressionMethod == |
| static_cast<uint16_t>(llvm::compression::Format::Zlib)) |
| CompressionFormat = llvm::compression::Format::Zlib; |
| else if (CompressionMethod == |
| static_cast<uint16_t>(llvm::compression::Format::Zstd)) |
| CompressionFormat = llvm::compression::Format::Zstd; |
| else |
| return createStringError(inconvertibleErrorCode(), |
| "Unknown compressing method"); |
| |
| llvm::Timer DecompressTimer("Decompression Timer", "Decompression time", |
| OffloadBundlerTimerGroup); |
| if (Verbose) |
| DecompressTimer.startTimer(); |
| |
| SmallVector<uint8_t, 0> DecompressedData; |
| StringRef CompressedData = Blob.substr(CurrentOffset); |
| if (llvm::Error DecompressionError = llvm::compression::decompress( |
| CompressionFormat, llvm::arrayRefFromStringRef(CompressedData), |
| DecompressedData, UncompressedSize)) |
| return createStringError(inconvertibleErrorCode(), |
| "Could not decompress embedded file contents: " + |
| llvm::toString(std::move(DecompressionError))); |
| |
| if (Verbose) { |
| DecompressTimer.stopTimer(); |
| |
| double DecompressionTimeSeconds = |
| DecompressTimer.getTotalTime().getWallTime(); |
| |
| // Recalculate MD5 hash for integrity check. |
| llvm::Timer HashRecalcTimer("Hash Recalculation Timer", |
| "Hash recalculation time", |
| OffloadBundlerTimerGroup); |
| HashRecalcTimer.startTimer(); |
| llvm::MD5 Hash; |
| llvm::MD5::MD5Result Result; |
| Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(), |
| DecompressedData.size())); |
| Hash.final(Result); |
| uint64_t RecalculatedHash = Result.low(); |
| HashRecalcTimer.stopTimer(); |
| bool HashMatch = (StoredHash == RecalculatedHash); |
| |
| double CompressionRate = |
| static_cast<double>(UncompressedSize) / CompressedData.size(); |
| double DecompressionSpeedMBs = |
| (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds; |
| |
| llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"; |
| if (ThisVersion >= 2) |
| llvm::errs() << "Total file size (from header): " |
| << formatWithCommas(TotalFileSize) << " bytes\n"; |
| llvm::errs() << "Decompression method: " |
| << (CompressionFormat == llvm::compression::Format::Zlib |
| ? "zlib" |
| : "zstd") |
| << "\n" |
| << "Size before decompression: " |
| << formatWithCommas(CompressedData.size()) << " bytes\n" |
| << "Size after decompression: " |
| << formatWithCommas(UncompressedSize) << " bytes\n" |
| << "Compression rate: " |
| << llvm::format("%.2lf", CompressionRate) << "\n" |
| << "Compression ratio: " |
| << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n" |
| << "Decompression speed: " |
| << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n" |
| << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n" |
| << "Recalculated hash: " |
| << llvm::format_hex(RecalculatedHash, 16) << "\n" |
| << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n"; |
| } |
| |
| return llvm::MemoryBuffer::getMemBufferCopy( |
| llvm::toStringRef(DecompressedData)); |
| } |
| |
| llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> |
| CompressedOffloadBundle::compress(llvm::compression::Params P, |
| const llvm::MemoryBuffer &Input, |
| bool Verbose) { |
| if (!llvm::compression::zstd::isAvailable() && |
| !llvm::compression::zlib::isAvailable()) |
| return createStringError(llvm::inconvertibleErrorCode(), |
| "Compression not supported"); |
| |
| llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time", |
| OffloadBundlerTimerGroup); |
| if (Verbose) |
| HashTimer.startTimer(); |
| llvm::MD5 Hash; |
| llvm::MD5::MD5Result Result; |
| Hash.update(Input.getBuffer()); |
| Hash.final(Result); |
| uint64_t TruncatedHash = Result.low(); |
| if (Verbose) |
| HashTimer.stopTimer(); |
| |
| SmallVector<uint8_t, 0> CompressedBuffer; |
| auto BufferUint8 = llvm::ArrayRef<uint8_t>( |
| reinterpret_cast<const uint8_t *>(Input.getBuffer().data()), |
| Input.getBuffer().size()); |
| |
| llvm::Timer CompressTimer("Compression Timer", "Compression time", |
| OffloadBundlerTimerGroup); |
| if (Verbose) |
| CompressTimer.startTimer(); |
| llvm::compression::compress(P, BufferUint8, CompressedBuffer); |
| if (Verbose) |
| CompressTimer.stopTimer(); |
| |
| uint16_t CompressionMethod = static_cast<uint16_t>(P.format); |
| uint32_t UncompressedSize = Input.getBuffer().size(); |
| uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) + |
| sizeof(Version) + sizeof(CompressionMethod) + |
| sizeof(UncompressedSize) + sizeof(TruncatedHash) + |
| CompressedBuffer.size(); |
| |
| SmallVector<char, 0> FinalBuffer; |
| llvm::raw_svector_ostream OS(FinalBuffer); |
| OS << MagicNumber; |
| OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version)); |
| OS.write(reinterpret_cast<const char *>(&CompressionMethod), |
| sizeof(CompressionMethod)); |
| OS.write(reinterpret_cast<const char *>(&TotalFileSize), |
| sizeof(TotalFileSize)); |
| OS.write(reinterpret_cast<const char *>(&UncompressedSize), |
| sizeof(UncompressedSize)); |
| OS.write(reinterpret_cast<const char *>(&TruncatedHash), |
| sizeof(TruncatedHash)); |
| OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()), |
| CompressedBuffer.size()); |
| |
| if (Verbose) { |
| auto MethodUsed = |
| P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib"; |
| double CompressionRate = |
| static_cast<double>(UncompressedSize) / CompressedBuffer.size(); |
| double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime(); |
| double CompressionSpeedMBs = |
| (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds; |
| |
| llvm::errs() << "Compressed bundle format version: " << Version << "\n" |
| << "Total file size (including headers): " |
| << formatWithCommas(TotalFileSize) << " bytes\n" |
| << "Compression method used: " << MethodUsed << "\n" |
| << "Compression level: " << P.level << "\n" |
| << "Binary size before compression: " |
| << formatWithCommas(UncompressedSize) << " bytes\n" |
| << "Binary size after compression: " |
| << formatWithCommas(CompressedBuffer.size()) << " bytes\n" |
| << "Compression rate: " |
| << llvm::format("%.2lf", CompressionRate) << "\n" |
| << "Compression ratio: " |
| << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n" |
| << "Compression speed: " |
| << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n" |
| << "Truncated MD5 hash: " |
| << llvm::format_hex(TruncatedHash, 16) << "\n"; |
| } |
| return llvm::MemoryBuffer::getMemBufferCopy( |
| llvm::StringRef(FinalBuffer.data(), FinalBuffer.size())); |
| } |