mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp - llvm-project - Git at Google

 //===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass for testing the lowering to XeVM as a generally
 // usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
 // XeGPU ops already embedded in gpu code.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
 #include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Pipelines/Passes.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
 #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassOptions.h"
 #include "mlir/Target/LLVM/XeVM/Target.h"
 #include "mlir/Transforms/Passes.h"

 using namespace mlir;

 namespace {
 //===----------------------------------------------------------------------===//
 // Pre-GPU common pipeline for both Host and GPU.
 //===----------------------------------------------------------------------===//
 void buildPreGPUCommonPassPipeline(
     OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
   // builtin.module scope passes.
   pm.addPass(createCSEPass());
   pm.addPass(createConvertVectorToSCFPass());
   {
     GpuXeVMAttachTargetOptions xevmTargetOptions;
     xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
     xevmTargetOptions.triple = options.zebinTriple;
     xevmTargetOptions.chip = options.zebinChip;
     xevmTargetOptions.optLevel = options.optLevel;
     xevmTargetOptions.cmdOptions = options.cmdOptions;
     pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
   }
   pm.addPass(createLowerAffinePass());
   pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
 }

 //===----------------------------------------------------------------------===//
 // GPUModule-specific stuff.
 //===----------------------------------------------------------------------===//
 void buildGPUPassPipeline(OpPassManager &pm,
                           const mlir::gpu::GPUToXeVMPipelineOptions &options) {
   if (options.xegpuOpLevel == "workgroup") {
     pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
     pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
     xegpu::XeGPUPropagateLayoutOptions layoutOptions;
     layoutOptions.layoutKind = "inst";
     pm.addNestedPass<gpu::GPUModuleOp>(
         xegpu::createXeGPUPropagateLayout(layoutOptions));
     pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
     pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
   }
   if (options.xegpuOpLevel == "subgroup" ||
       options.xegpuOpLevel == "workgroup") {
     xegpu::XeGPUPropagateLayoutOptions layoutOptions;
     layoutOptions.layoutKind = "lane";
     pm.addNestedPass<gpu::GPUModuleOp>(
         xegpu::createXeGPUPropagateLayout(layoutOptions));
     pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
     pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
     pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
   }
   pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM());
   pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
   {
     ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
     gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex;
     pm.addNestedPass<gpu::GPUModuleOp>(
         createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
   }
   pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
 }

 //===----------------------------------------------------------------------===//
 // Post-GPU pipeline for both Host and GPU.
 //===----------------------------------------------------------------------===//
 void buildPostGPUCommonPassPipeline(
     OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
   // builtin.module scope passes.
   pm.addPass(createSCFToControlFlowPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
   {
     GpuToLLVMConversionPassOptions gpuToLLVMOptions;
     gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
     gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
     pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
   }
   pm.addPass(createLowerAffinePass());
   pm.addPass(createConvertVectorToLLVMPass());
   pm.addPass(createConvertToLLVMPass());
   pm.addPass(createReconcileUnrealizedCastsPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
   // gpu-module-to-binary
   {
     GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
     gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
     gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
     pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
   }
 }
 } // namespace

 void mlir::gpu::buildLowerToXeVMPassPipeline(
     OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
   // Pre-GPU common pipelines.
   buildPreGPUCommonPassPipeline(pm, options);

   // GPUModule-specific stuff.
   buildGPUPassPipeline(pm, options);

   // Post-GPU pipeline for both Host and GPU.
   buildPostGPUCommonPassPipeline(pm, options);
 }

 void mlir::gpu::registerGPUToXeVMPipeline() {
   PassPipelineRegistration<GPUToXeVMPipelineOptions>(
       "gpu-lower-to-xevm-pipeline",
       "The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
       "code to the "
       "specified compilation target (default is fatbin) then lowers the host "
       "code.",
       buildLowerToXeVMPassPipeline);
 }
	//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a pass for testing the lowering to XeVM as a generally
	// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
	// XeGPU ops already embedded in gpu code.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
	#include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
	#include "mlir/Conversion/Passes.h"
	#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
	#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
	#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
	#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
	#include "mlir/Dialect/Func/IR/FuncOps.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/GPU/Pipelines/Passes.h"
	#include "mlir/Dialect/GPU/Transforms/Passes.h"
	#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
	#include "mlir/Dialect/MemRef/Transforms/Passes.h"
	#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
	#include "mlir/Pass/PassManager.h"
	#include "mlir/Pass/PassOptions.h"
	#include "mlir/Target/LLVM/XeVM/Target.h"
	#include "mlir/Transforms/Passes.h"

	using namespace mlir;

	namespace {
	//===----------------------------------------------------------------------===//
	// Pre-GPU common pipeline for both Host and GPU.
	//===----------------------------------------------------------------------===//
	void buildPreGPUCommonPassPipeline(
	OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
	// builtin.module scope passes.
	pm.addPass(createCSEPass());
	pm.addPass(createConvertVectorToSCFPass());
	{
	GpuXeVMAttachTargetOptions xevmTargetOptions;
	xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
	xevmTargetOptions.triple = options.zebinTriple;
	xevmTargetOptions.chip = options.zebinChip;
	xevmTargetOptions.optLevel = options.optLevel;
	xevmTargetOptions.cmdOptions = options.cmdOptions;
	pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
	}
	pm.addPass(createLowerAffinePass());
	pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
	}

	//===----------------------------------------------------------------------===//
	// GPUModule-specific stuff.
	//===----------------------------------------------------------------------===//
	void buildGPUPassPipeline(OpPassManager &pm,
	const mlir::gpu::GPUToXeVMPipelineOptions &options) {
	if (options.xegpuOpLevel == "workgroup") {
	pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	xegpu::XeGPUPropagateLayoutOptions layoutOptions;
	layoutOptions.layoutKind = "inst";
	pm.addNestedPass<gpu::GPUModuleOp>(
	xegpu::createXeGPUPropagateLayout(layoutOptions));
	pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
	pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	}
	if (options.xegpuOpLevel == "subgroup" \|\|
	options.xegpuOpLevel == "workgroup") {
	xegpu::XeGPUPropagateLayoutOptions layoutOptions;
	layoutOptions.layoutKind = "lane";
	pm.addNestedPass<gpu::GPUModuleOp>(
	xegpu::createXeGPUPropagateLayout(layoutOptions));
	pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
	pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
	}
	pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM());
	pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
	{
	ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
	gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex;
	pm.addNestedPass<gpu::GPUModuleOp>(
	createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
	}
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
	}

	//===----------------------------------------------------------------------===//
	// Post-GPU pipeline for both Host and GPU.
	//===----------------------------------------------------------------------===//
	void buildPostGPUCommonPassPipeline(
	OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
	// builtin.module scope passes.
	pm.addPass(createSCFToControlFlowPass());
	pm.addPass(memref::createExpandStridedMetadataPass());
	{
	GpuToLLVMConversionPassOptions gpuToLLVMOptions;
	gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
	gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
	pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
	}
	pm.addPass(createLowerAffinePass());
	pm.addPass(createConvertVectorToLLVMPass());
	pm.addPass(createConvertToLLVMPass());
	pm.addPass(createReconcileUnrealizedCastsPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	// gpu-module-to-binary
	{
	GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
	gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
	gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
	pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
	}
	}
	} // namespace

	void mlir::gpu::buildLowerToXeVMPassPipeline(
	OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
	// Pre-GPU common pipelines.
	buildPreGPUCommonPassPipeline(pm, options);

	// GPUModule-specific stuff.
	buildGPUPassPipeline(pm, options);

	// Post-GPU pipeline for both Host and GPU.
	buildPostGPUCommonPassPipeline(pm, options);
	}

	void mlir::gpu::registerGPUToXeVMPipeline() {
	PassPipelineRegistration<GPUToXeVMPipelineOptions>(
	"gpu-lower-to-xevm-pipeline",
	"The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
	"code to the "
	"specified compilation target (default is fatbin) then lowers the host "
	"code.",
	buildLowerToXeVMPassPipeline);
	}