mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp - llvm-project - Git at Google

 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a base class for a pass to serialize a gpu module
 // into a binary blob that can be executed on a GPU. The binary blob is added
 // as a string attribute to the gpu module.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/ExecutionEngine/OptUtils.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"

 #include <optional>
 #include <string>

 #define DEBUG_TYPE "serialize-to-blob"

 using namespace mlir;

 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }

 gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
     : OperationPass<gpu::GPUModuleOp>(passID) {}

 gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
     : OperationPass<gpu::GPUModuleOp>(other) {}

 std::optional<std::string>
 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
                                          llvm::TargetMachine &targetMachine) {
   llvmModule.setDataLayout(targetMachine.createDataLayout());

   if (failed(optimizeLlvm(llvmModule, targetMachine)))
     return std::nullopt;

   std::string targetISA;
   llvm::raw_string_ostream stream(targetISA);

   { // Drop pstream after this to prevent the ISA from being stuck buffering
     llvm::buffer_ostream pstream(stream);
     llvm::legacy::PassManager codegenPasses;

     if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
                                           llvm::CodeGenFileType::AssemblyFile))
       return std::nullopt;

     codegenPasses.run(llvmModule);
   }
   return stream.str();
 }

 void gpu::SerializeToBlobPass::runOnOperation() {
   // Lower the module to an LLVM IR module using a separate context to enable
   // multi-threaded processing.
   llvm::LLVMContext llvmContext;
   std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
   if (!llvmModule)
     return signalPassFailure();

   // Lower the LLVM IR module to target ISA.
   std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
   if (!targetMachine)
     return signalPassFailure();

   std::optional<std::string> maybeTargetISA =
       translateToISA(*llvmModule, *targetMachine);

   if (!maybeTargetISA.has_value())
     return signalPassFailure();

   std::string targetISA = std::move(*maybeTargetISA);

   LLVM_DEBUG({
     llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
     llvm::dbgs() << targetISA << "\n";
     llvm::dbgs().flush();
   });

   // Serialize the target ISA.
   std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
   if (!blob)
     return signalPassFailure();

   // Add the blob as module attribute.
   auto attr =
       StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
   getOperation()->setAttr(gpuBinaryAnnotation, attr);
 }

 LogicalResult
 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
                                        llvm::TargetMachine &targetMachine) {
   int optLevel = this->optLevel.getValue();
   if (optLevel < 0 || optLevel > 3)
     return getOperation().emitError()
            << "invalid optimization level " << optLevel;

   targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));

   auto transformer =
       makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
   auto error = transformer(&llvmModule);
   if (error) {
     InFlightDiagnostic mlirError = getOperation()->emitError();
     llvm::handleAllErrors(
         std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
           mlirError << "could not optimize LLVM IR: " << ei.message();
         });
     return mlirError;
   }
   return success();
 }

 std::unique_ptr<llvm::TargetMachine>
 gpu::SerializeToBlobPass::createTargetMachine() {
   Location loc = getOperation().getLoc();
   std::string error;
   const llvm::Target *target =
       llvm::TargetRegistry::lookupTarget(triple, error);
   if (!target) {
     emitError(loc, Twine("failed to lookup target: ") + error);
     return {};
   }
   llvm::TargetMachine *machine =
       target->createTargetMachine(triple, chip, features, {}, {});
   if (!machine) {
     emitError(loc, "failed to create target machine");
     return {};
   }

   return std::unique_ptr<llvm::TargetMachine>{machine};
 }

 std::unique_ptr<llvm::Module>
 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
   return translateModuleToLLVMIR(getOperation(), llvmContext,
                                  "LLVMDialectModule");
 }
	//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a base class for a pass to serialize a gpu module
	// into a binary blob that can be executed on a GPU. The binary blob is added
	// as a string attribute to the gpu module.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/GPU/Transforms/Passes.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/ExecutionEngine/OptUtils.h"
	#include "mlir/Pass/Pass.h"
	#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
	#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
	#include "mlir/Target/LLVMIR/Export.h"
	#include "llvm/IR/LegacyPassManager.h"
	#include "llvm/MC/TargetRegistry.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Target/TargetMachine.h"

	#include <optional>
	#include <string>

	#define DEBUG_TYPE "serialize-to-blob"

	using namespace mlir;

	std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }

	gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
	: OperationPass<gpu::GPUModuleOp>(passID) {}

	gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
	: OperationPass<gpu::GPUModuleOp>(other) {}

	std::optional<std::string>
	gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
	llvm::TargetMachine &targetMachine) {
	llvmModule.setDataLayout(targetMachine.createDataLayout());

	if (failed(optimizeLlvm(llvmModule, targetMachine)))
	return std::nullopt;

	std::string targetISA;
	llvm::raw_string_ostream stream(targetISA);

	{ // Drop pstream after this to prevent the ISA from being stuck buffering
	llvm::buffer_ostream pstream(stream);
	llvm::legacy::PassManager codegenPasses;

	if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
	llvm::CodeGenFileType::AssemblyFile))
	return std::nullopt;

	codegenPasses.run(llvmModule);
	}
	return stream.str();
	}

	void gpu::SerializeToBlobPass::runOnOperation() {
	// Lower the module to an LLVM IR module using a separate context to enable
	// multi-threaded processing.
	llvm::LLVMContext llvmContext;
	std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
	if (!llvmModule)
	return signalPassFailure();

	// Lower the LLVM IR module to target ISA.
	std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
	if (!targetMachine)
	return signalPassFailure();

	std::optional<std::string> maybeTargetISA =
	translateToISA(llvmModule, targetMachine);

	if (!maybeTargetISA.has_value())
	return signalPassFailure();

	std::string targetISA = std::move(*maybeTargetISA);

	LLVM_DEBUG({
	llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
	llvm::dbgs() << targetISA << "\n";
	llvm::dbgs().flush();
	});

	// Serialize the target ISA.
	std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
	if (!blob)
	return signalPassFailure();

	// Add the blob as module attribute.
	auto attr =
	StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
	getOperation()->setAttr(gpuBinaryAnnotation, attr);
	}

	LogicalResult
	gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
	llvm::TargetMachine &targetMachine) {
	int optLevel = this->optLevel.getValue();
	if (optLevel < 0 \|\| optLevel > 3)
	return getOperation().emitError()
	<< "invalid optimization level " << optLevel;

	targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));

	auto transformer =
	makeOptimizingTransformer(optLevel, /sizeLevel=/0, &targetMachine);
	auto error = transformer(&llvmModule);
	if (error) {
	InFlightDiagnostic mlirError = getOperation()->emitError();
	llvm::handleAllErrors(
	std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
	mlirError << "could not optimize LLVM IR: " << ei.message();
	});
	return mlirError;
	}
	return success();
	}

	std::unique_ptr<llvm::TargetMachine>
	gpu::SerializeToBlobPass::createTargetMachine() {
	Location loc = getOperation().getLoc();
	std::string error;
	const llvm::Target *target =
	llvm::TargetRegistry::lookupTarget(triple, error);
	if (!target) {
	emitError(loc, Twine("failed to lookup target: ") + error);
	return {};
	}
	llvm::TargetMachine *machine =
	target->createTargetMachine(triple, chip, features, {}, {});
	if (!machine) {
	emitError(loc, "failed to create target machine");
	return {};
	}

	return std::unique_ptr<llvm::TargetMachine>{machine};
	}

	std::unique_ptr<llvm::Module>
	gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
	return translateModuleToLLVMIR(getOperation(), llvmContext,
	"LLVMDialectModule");
	}