| //===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a pass for testing the lowering to XeVM as a generally |
| // usable sink pass. If XeGPU ops are used, it expects the MLIR code to have |
| // XeGPU ops already embedded in gpu code. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" |
| #include "mlir/Conversion/MathToXeVM/MathToXeVM.h" |
| #include "mlir/Conversion/Passes.h" |
| #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" |
| #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" |
| #include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h" |
| #include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" |
| #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| #include "mlir/Dialect/GPU/Pipelines/Passes.h" |
| #include "mlir/Dialect/GPU/Transforms/Passes.h" |
| #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h" |
| #include "mlir/Dialect/MemRef/Transforms/Passes.h" |
| #include "mlir/Dialect/XeGPU/Transforms/Passes.h" |
| #include "mlir/Pass/PassManager.h" |
| #include "mlir/Pass/PassOptions.h" |
| #include "mlir/Target/LLVM/XeVM/Target.h" |
| #include "mlir/Transforms/Passes.h" |
| |
| using namespace mlir; |
| |
| namespace { |
| //===----------------------------------------------------------------------===// |
| // Pre-GPU common pipeline for both Host and GPU. |
| //===----------------------------------------------------------------------===// |
| void buildPreGPUCommonPassPipeline( |
| OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) { |
| // builtin.module scope passes. |
| pm.addPass(createCSEPass()); |
| pm.addPass(createConvertVectorToSCFPass()); |
| { |
| GpuXeVMAttachTargetOptions xevmTargetOptions; |
| xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher; |
| xevmTargetOptions.triple = options.zebinTriple; |
| xevmTargetOptions.chip = options.zebinChip; |
| xevmTargetOptions.optLevel = options.optLevel; |
| xevmTargetOptions.cmdOptions = options.cmdOptions; |
| pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions)); |
| } |
| pm.addPass(createLowerAffinePass()); |
| pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass()); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // GPUModule-specific stuff. |
| //===----------------------------------------------------------------------===// |
| void buildGPUPassPipeline(OpPassManager &pm, |
| const mlir::gpu::GPUToXeVMPipelineOptions &options) { |
| if (options.xegpuOpLevel == "workgroup") { |
| pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| xegpu::XeGPUPropagateLayoutOptions layoutOptions; |
| layoutOptions.layoutKind = "inst"; |
| pm.addNestedPass<gpu::GPUModuleOp>( |
| xegpu::createXeGPUPropagateLayout(layoutOptions)); |
| pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| } |
| if (options.xegpuOpLevel == "subgroup" || |
| options.xegpuOpLevel == "workgroup") { |
| xegpu::XeGPUPropagateLayoutOptions layoutOptions; |
| layoutOptions.layoutKind = "lane"; |
| pm.addNestedPass<gpu::GPUModuleOp>( |
| xegpu::createXeGPUPropagateLayout(layoutOptions)); |
| pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize()); |
| } |
| pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass()); |
| { |
| ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions; |
| gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex; |
| pm.addNestedPass<gpu::GPUModuleOp>( |
| createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions)); |
| } |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass()); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Post-GPU pipeline for both Host and GPU. |
| //===----------------------------------------------------------------------===// |
| void buildPostGPUCommonPassPipeline( |
| OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) { |
| // builtin.module scope passes. |
| pm.addPass(createSCFToControlFlowPass()); |
| pm.addPass(memref::createExpandStridedMetadataPass()); |
| { |
| GpuToLLVMConversionPassOptions gpuToLLVMOptions; |
| gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv; |
| gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv; |
| pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions)); |
| } |
| pm.addPass(createLowerAffinePass()); |
| pm.addPass(createConvertVectorToLLVMPass()); |
| pm.addPass(createConvertToLLVMPass()); |
| pm.addPass(createReconcileUnrealizedCastsPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass()); |
| pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); |
| // gpu-module-to-binary |
| { |
| GpuModuleToBinaryPassOptions gpuToModuleBinOptions; |
| gpuToModuleBinOptions.compilationTarget = options.binaryFormat; |
| gpuToModuleBinOptions.cmdOptions = options.cmdOptions; |
| pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions)); |
| } |
| } |
| } // namespace |
| |
| void mlir::gpu::buildLowerToXeVMPassPipeline( |
| OpPassManager &pm, const GPUToXeVMPipelineOptions &options) { |
| // Pre-GPU common pipelines. |
| buildPreGPUCommonPassPipeline(pm, options); |
| |
| // GPUModule-specific stuff. |
| buildGPUPassPipeline(pm, options); |
| |
| // Post-GPU pipeline for both Host and GPU. |
| buildPostGPUCommonPassPipeline(pm, options); |
| } |
| |
| void mlir::gpu::registerGPUToXeVMPipeline() { |
| PassPipelineRegistration<GPUToXeVMPipelineOptions>( |
| "gpu-lower-to-xevm-pipeline", |
| "The default GPU to XeVM lowering pipeline. It starts by lowering GPU " |
| "code to the " |
| "specified compilation target (default is fatbin) then lowers the host " |
| "code.", |
| buildLowerToXeVMPassPipeline); |
| } |