blob: 38313dc3c01d57ad7f29568166aa84b69047107d [file] [log] [blame] [edit]
//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass for testing the lowering to XeVM as a generally
// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
// XeGPU ops already embedded in gpu code.
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Pipelines/Passes.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Target/LLVM/XeVM/Target.h"
#include "mlir/Transforms/Passes.h"
using namespace mlir;
namespace {
//===----------------------------------------------------------------------===//
// Pre-GPU common pipeline for both Host and GPU.
//===----------------------------------------------------------------------===//
void buildPreGPUCommonPassPipeline(
OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
// builtin.module scope passes.
pm.addPass(createCSEPass());
pm.addPass(createConvertVectorToSCFPass());
{
GpuXeVMAttachTargetOptions xevmTargetOptions;
xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
xevmTargetOptions.triple = options.zebinTriple;
xevmTargetOptions.chip = options.zebinChip;
xevmTargetOptions.optLevel = options.optLevel;
xevmTargetOptions.cmdOptions = options.cmdOptions;
pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
}
pm.addPass(createLowerAffinePass());
pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
}
//===----------------------------------------------------------------------===//
// GPUModule-specific stuff.
//===----------------------------------------------------------------------===//
void buildGPUPassPipeline(OpPassManager &pm,
const mlir::gpu::GPUToXeVMPipelineOptions &options) {
if (options.xegpuOpLevel == "workgroup") {
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
xegpu::XeGPUPropagateLayoutOptions layoutOptions;
layoutOptions.layoutKind = "inst";
pm.addNestedPass<gpu::GPUModuleOp>(
xegpu::createXeGPUPropagateLayout(layoutOptions));
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
}
if (options.xegpuOpLevel == "subgroup" ||
options.xegpuOpLevel == "workgroup") {
xegpu::XeGPUPropagateLayoutOptions layoutOptions;
layoutOptions.layoutKind = "lane";
pm.addNestedPass<gpu::GPUModuleOp>(
xegpu::createXeGPUPropagateLayout(layoutOptions));
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
}
pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM());
pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
{
ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex;
pm.addNestedPass<gpu::GPUModuleOp>(
createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
}
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
}
//===----------------------------------------------------------------------===//
// Post-GPU pipeline for both Host and GPU.
//===----------------------------------------------------------------------===//
void buildPostGPUCommonPassPipeline(
OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
// builtin.module scope passes.
pm.addPass(createSCFToControlFlowPass());
pm.addPass(memref::createExpandStridedMetadataPass());
{
GpuToLLVMConversionPassOptions gpuToLLVMOptions;
gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
}
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertVectorToLLVMPass());
pm.addPass(createConvertToLLVMPass());
pm.addPass(createReconcileUnrealizedCastsPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
// gpu-module-to-binary
{
GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
}
}
} // namespace
void mlir::gpu::buildLowerToXeVMPassPipeline(
OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
// Pre-GPU common pipelines.
buildPreGPUCommonPassPipeline(pm, options);
// GPUModule-specific stuff.
buildGPUPassPipeline(pm, options);
// Post-GPU pipeline for both Host and GPU.
buildPostGPUCommonPassPipeline(pm, options);
}
void mlir::gpu::registerGPUToXeVMPipeline() {
PassPipelineRegistration<GPUToXeVMPipelineOptions>(
"gpu-lower-to-xevm-pipeline",
"The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
"code to the "
"specified compilation target (default is fatbin) then lowers the host "
"code.",
buildLowerToXeVMPassPipeline);
}