blob: 6b45ed0ae4ced895dac5acc6762be299b8a93c71 [file] [log] [blame]
//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
//
// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements utility methods for working with the XeGPU dialect.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include <cstdint>
#include <numeric>
using namespace mlir;
FailureOr<VectorType>
mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
// It only works for subgroup level layout, which only has lane_layout
// and lane_data, and is to distribute a SIMD code into SIMT code.
if (!layout || !layout.isSgLayout())
return failure();
SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
auto tdescShape = tdescTy.getShape();
auto elementType = tdescTy.getElementType();
// compute sgSize by multiply elements of laneLayout
// e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
// e.g. for 1D layout, sgSize = laneLayout[0]
auto sgSize = std::accumulate(laneLayout.begin(), laneLayout.end(), 1,
std::multiplies<int64_t>());
// Case 1: regular loads/stores
auto scatterAttr = tdescTy.getEncodingAsScatterTensorDescAttr();
if (scatterAttr) {
auto chunkSize = scatterAttr.getChunkSize().getInt();
// Verify if the first dimension of the tensor descriptor shape is
// distributable.
assert(tdescShape[0] == laneLayout[0] &&
"tensor descriptor shape is not distributable");
return VectorType::get({chunkSize}, elementType);
}
// Case 2: block loads/stores
// Check if the tensor descriptor shape is distributable.
int64_t tensorSize = 1;
for (auto [tdescDim, laneDim, laneDataDim] :
llvm::zip_equal(tdescShape, laneLayout, laneData)) {
assert((tdescDim % (laneDim * laneDataDim) == 0) &&
"tensor descriptor shape is not distributable");
tensorSize *= tdescDim;
}
// tensorSize must be adjusted for array_length.
tensorSize *= tdescTy.getArrayLength();
return VectorType::get({tensorSize / sgSize}, elementType);
}
FailureOr<VectorType>
mlir::xegpu::getDistributedVectorType(VectorType originalType,
xegpu::LayoutAttr layout) {
int64_t rank = originalType.getRank();
// Distributed vector type is only supported for 1D, 2D and 3D vectors.
if (rank < 1 || rank > 3)
return failure();
ArrayRef<int64_t> shape = originalType.getShape();
// arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
// of the 3D vector.
int arrayLength = 1;
if (rank == 3) {
arrayLength = shape[0];
shape = shape.drop_front();
}
auto helperTdescTy = xegpu::TensorDescType::get(
shape, originalType.getElementType(), arrayLength,
/*boundary_check=*/true,
/*memory_space=*/xegpu::MemorySpace::Global, layout);
return xegpu::getDistributedVectorType(helperTdescTy);
}