blob: c39bd06c81cbf85e74a264c199bd06f0085f87d9 [file] [edit]
//===- OpenACCUtilsTiling.cpp - OpenACC Loop Tiling Utilities -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains utility functions for tiling OpenACC loops.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/OpenACC/OpenACCUtilsTiling.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Transforms/RegionUtils.h"
// Resolve unknown tile sizes (represented as -1 for tile(*)) to the default.
// Returns a value with the same type as targetType.
static mlir::Value resolveAndCastTileSize(mlir::Value tileSize,
int32_t defaultTileSize,
mlir::Type targetType,
mlir::RewriterBase &rewriter,
mlir::Location loc) {
auto constVal = mlir::getConstantIntValue(tileSize);
if (constVal && *constVal < 0) {
// Create constant with the target type directly
return mlir::arith::ConstantOp::create(
rewriter, loc, targetType,
rewriter.getIntegerAttr(targetType, defaultTileSize));
}
return mlir::getValueOrCreateCastToIndexLike(rewriter, loc, targetType,
tileSize);
}
// Remove vector/worker attributes from loop
static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop) {
if (loop.hasVector() || loop.getVectorValue()) {
loop.removeVectorAttr();
loop.removeVectorOperandsDeviceTypeAttr();
} else if (loop.hasWorker() || loop.getWorkerValue()) {
loop.removeWorkerAttr();
loop.removeWorkerNumOperandsDeviceTypeAttr();
}
}
// Create a new ACC loop with new steps, lb, ub from original loop
static mlir::acc::LoopOp
createACCLoopFromOriginal(mlir::acc::LoopOp origLoop,
mlir::RewriterBase &rewriter, mlir::ValueRange lb,
mlir::ValueRange ub, mlir::ValueRange step,
mlir::DenseBoolArrayAttr inclusiveUBAttr,
mlir::acc::CombinedConstructsTypeAttr combinedAttr,
mlir::Location loc, bool preserveCollapse) {
mlir::ArrayAttr collapseAttr = mlir::ArrayAttr{};
mlir::ArrayAttr collapseDeviceTypeAttr = mlir::ArrayAttr{};
if (preserveCollapse) {
collapseAttr = origLoop.getCollapseAttr();
collapseDeviceTypeAttr = origLoop.getCollapseDeviceTypeAttr();
}
auto newLoop = mlir::acc::LoopOp::create(
rewriter, loc, origLoop->getResultTypes(), lb, ub, step, inclusiveUBAttr,
collapseAttr, collapseDeviceTypeAttr, origLoop.getGangOperands(),
origLoop.getGangOperandsArgTypeAttr(),
origLoop.getGangOperandsSegmentsAttr(),
origLoop.getGangOperandsDeviceTypeAttr(), origLoop.getWorkerNumOperands(),
origLoop.getWorkerNumOperandsDeviceTypeAttr(),
origLoop.getVectorOperands(), origLoop.getVectorOperandsDeviceTypeAttr(),
origLoop.getSeqAttr(), origLoop.getIndependentAttr(),
origLoop.getAuto_Attr(), origLoop.getGangAttr(), origLoop.getWorkerAttr(),
origLoop.getVectorAttr(), mlir::ValueRange{}, mlir::DenseI32ArrayAttr{},
mlir::ArrayAttr{}, origLoop.getCacheOperands(),
origLoop.getPrivateOperands(), origLoop.getFirstprivateOperands(),
origLoop.getReductionOperands(), combinedAttr);
return newLoop;
}
// Create inner loop inside input loop
static mlir::acc::LoopOp
createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter,
mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step,
mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc) {
mlir::acc::LoopOp elementLoop = createACCLoopFromOriginal(
inputLoop, rewriter, lb, ub, step, inclusiveUBAttr,
mlir::acc::CombinedConstructsTypeAttr{}, loc, /*preserveCollapse*/ false);
// Remove gang/worker attributes from inner loops
rewriter.startOpModification(elementLoop);
if (inputLoop.hasGang() ||
inputLoop.getGangValue(mlir::acc::GangArgType::Num) ||
inputLoop.getGangValue(mlir::acc::GangArgType::Dim) ||
inputLoop.getGangValue(mlir::acc::GangArgType::Static)) {
elementLoop.removeGangAttr();
elementLoop.removeGangOperandsArgTypeAttr();
elementLoop.removeGangOperandsSegmentsAttr();
elementLoop.removeGangOperandsDeviceTypeAttr();
}
if (inputLoop.hasVector() || inputLoop.getVectorValue()) {
elementLoop.removeWorkerAttr();
elementLoop.removeWorkerNumOperandsDeviceTypeAttr();
}
rewriter.finalizeOpModification(elementLoop);
// Create empty block in elementLoop and add IV argument
mlir::Block *blk = rewriter.createBlock(&elementLoop.getRegion(),
elementLoop.getRegion().begin());
rewriter.setInsertionPointToEnd(blk);
mlir::acc::YieldOp::create(rewriter, loc);
elementLoop.getBody().addArgument(
inputLoop.getBody().getArgument(0).getType(), loc);
return elementLoop;
}
// Move ops from source to target Loop and replace uses of IVs
static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop,
mlir::acc::LoopOp targetLoop,
llvm::ArrayRef<mlir::Value> newIVs,
llvm::ArrayRef<mlir::Value> origIVs,
size_t nOps, mlir::RewriterBase &rewriter) {
// Move ops from source to target loop [begin, begin + nOps - 1)
mlir::Block::iterator begin = sourceLoop.getBody().begin();
targetLoop.getBody().getOperations().splice(
targetLoop.getBody().getOperations().begin(),
sourceLoop.getBody().getOperations(), begin, std::next(begin, nOps - 1));
// Replace uses of origIV with newIV
for (auto [i, newIV] : llvm::enumerate(newIVs))
mlir::replaceAllUsesInRegionWith(origIVs[i], newIV, targetLoop.getRegion());
}
mlir::acc::LoopOp
mlir::acc::tileACCLoops(llvm::SmallVector<mlir::acc::LoopOp> &tileLoops,
const llvm::SmallVector<mlir::Value> &tileSizes,
int32_t defaultTileSize, mlir::RewriterBase &rewriter) {
// Tile collapsed and/or nested loops
mlir::acc::LoopOp outerLoop = tileLoops[0];
const mlir::Location loc = outerLoop.getLoc();
mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1];
llvm::SmallVector<mlir::Value, 3> origIVs;
llvm::SmallVector<mlir::Value, 3> origSteps;
llvm::SmallVector<mlir::Value, 3> origUBs;
llvm::SmallVector<mlir::Value, 3> newSteps;
llvm::SmallVector<mlir::Value, 3> newUBs;
llvm::SmallVector<mlir::Value, 3> newIVs;
size_t nOps = innerLoop.getBody().getOperations().size();
// Extract original inclusiveUBs
llvm::SmallVector<bool> inclusiveUBs;
for (auto tileLoop : tileLoops) {
for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
// inclusiveUBs are present on the IR from Fortran frontend for DO loops
// but might not be present from other frontends (python)
// So check if it exists
if (tileLoop.getInclusiveUpperboundAttr())
inclusiveUBs.push_back(
tileLoop.getInclusiveUpperboundAttr().asArrayRef()[j]);
else
inclusiveUBs.push_back(false);
}
}
// Extract original ivs, UBs, steps, and calculate new steps
rewriter.setInsertionPoint(outerLoop);
for (auto [i, tileLoop] : llvm::enumerate(tileLoops)) {
for (auto arg : tileLoop.getBody().getArguments())
origIVs.push_back(arg);
for (auto ub : tileLoop.getUpperbound())
origUBs.push_back(ub);
llvm::SmallVector<mlir::Value, 3> currentLoopSteps;
for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
origSteps.push_back(step);
if (i + j >= tileSizes.size()) {
currentLoopSteps.push_back(step);
} else {
mlir::Value tileSize = resolveAndCastTileSize(
tileSizes[i + j], defaultTileSize, step.getType(), rewriter, loc);
auto newLoopStep =
mlir::arith::MulIOp::create(rewriter, loc, step, tileSize);
currentLoopSteps.push_back(newLoopStep);
newSteps.push_back(newLoopStep);
}
}
rewriter.startOpModification(tileLoop);
tileLoop.getStepMutable().clear();
tileLoop.getStepMutable().append(currentLoopSteps);
rewriter.finalizeOpModification(tileLoop);
}
// Calculate new upper bounds for element loops
for (size_t i = 0; i < newSteps.size(); i++) {
rewriter.setInsertionPoint(innerLoop.getBody().getTerminator());
// UpperBound: min(origUB, origIV+(originalStep*tile_size))
auto stepped =
mlir::arith::AddIOp::create(rewriter, loc, origIVs[i], newSteps[i]);
mlir::Value newUB = stepped;
if (inclusiveUBs[i]) {
// Handle InclusiveUB
// UpperBound: min(origUB, origIV+(originalStep*tile_size - 1))
auto c1 = mlir::arith::ConstantOp::create(
rewriter, loc, newSteps[i].getType(),
rewriter.getIntegerAttr(newSteps[i].getType(), 1));
newUB = mlir::arith::SubIOp::create(rewriter, loc, stepped, c1);
}
newUBs.push_back(
mlir::arith::MinSIOp::create(rewriter, loc, origUBs[i], newUB));
}
// Create and insert nested elementLoopOps before terminator of outer loopOp
mlir::acc::LoopOp currentLoop = innerLoop;
for (size_t i = 0; i < tileSizes.size(); i++) {
rewriter.setInsertionPoint(currentLoop.getBody().getTerminator());
mlir::DenseBoolArrayAttr inclusiveUBAttr = mlir::DenseBoolArrayAttr{};
if (inclusiveUBs[i])
inclusiveUBAttr = rewriter.getDenseBoolArrayAttr({true});
mlir::acc::LoopOp elementLoop =
createInnerLoop(innerLoop, rewriter, mlir::ValueRange{origIVs[i]},
mlir::ValueRange{newUBs[i]},
mlir::ValueRange{origSteps[i]}, inclusiveUBAttr, loc);
// Remove vector/worker attributes from inner element loops except
// outermost element loop
if (i > 0) {
rewriter.startOpModification(elementLoop);
removeWorkerVectorFromLoop(elementLoop);
rewriter.finalizeOpModification(elementLoop);
}
newIVs.push_back(elementLoop.getBody().getArgument(0));
currentLoop = elementLoop;
}
// Remove vector/worker attributes from outer tile loops
for (auto tileLoop : tileLoops) {
rewriter.startOpModification(tileLoop);
removeWorkerVectorFromLoop(tileLoop);
rewriter.finalizeOpModification(tileLoop);
}
// Move ops from inner tile loop to inner element loop and replace IV uses
moveOpsAndReplaceIVs(innerLoop, currentLoop, newIVs, origIVs, nOps, rewriter);
return outerLoop;
}
llvm::SmallVector<mlir::acc::LoopOp>
mlir::acc::uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
unsigned collapseCount,
mlir::RewriterBase &rewriter) {
llvm::SmallVector<mlir::acc::LoopOp> newLoops;
llvm::SmallVector<mlir::Value, 3> newIVs;
mlir::Location loc = origLoop.getLoc();
llvm::SmallVector<bool> newInclusiveUBs;
llvm::SmallVector<mlir::Value, 3> lbs, ubs, steps;
for (unsigned i = 0; i < collapseCount; i++) {
// inclusiveUpperbound attribute might not be set, default to false
bool inclusiveUB = false;
if (origLoop.getInclusiveUpperboundAttr())
inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
newInclusiveUBs.push_back(inclusiveUB);
lbs.push_back(origLoop.getLowerbound()[i]);
ubs.push_back(origLoop.getUpperbound()[i]);
steps.push_back(origLoop.getStep()[i]);
}
mlir::acc::LoopOp outerLoop = createACCLoopFromOriginal(
origLoop, rewriter, lbs, ubs, steps,
rewriter.getDenseBoolArrayAttr(newInclusiveUBs),
origLoop.getCombinedAttr(), loc, /*preserveCollapse*/ true);
mlir::Block *blk = rewriter.createBlock(&outerLoop.getRegion(),
outerLoop.getRegion().begin());
rewriter.setInsertionPointToEnd(blk);
mlir::acc::YieldOp::create(rewriter, loc);
for (unsigned i = 0; i < collapseCount; i++) {
outerLoop.getBody().addArgument(origLoop.getBody().getArgument(i).getType(),
loc);
newIVs.push_back(outerLoop.getBody().getArgument(i));
}
newLoops.push_back(outerLoop);
mlir::acc::LoopOp currentLoopOp = outerLoop;
for (unsigned i = collapseCount; i < tileCount; i++) {
rewriter.setInsertionPoint(currentLoopOp.getBody().getTerminator());
bool inclusiveUB = false;
if (origLoop.getInclusiveUpperboundAttr())
inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
mlir::DenseBoolArrayAttr inclusiveUBAttr =
rewriter.getDenseBoolArrayAttr({inclusiveUB});
mlir::acc::LoopOp innerLoop = createInnerLoop(
origLoop, rewriter, mlir::ValueRange{origLoop.getLowerbound()[i]},
mlir::ValueRange{origLoop.getUpperbound()[i]},
mlir::ValueRange{origLoop.getStep()[i]}, inclusiveUBAttr, loc);
newIVs.push_back(innerLoop.getBody().getArgument(0));
newLoops.push_back(innerLoop);
currentLoopOp = innerLoop;
}
// Move ops from origLoop to innermost loop and replace uses of IVs
size_t nOps = origLoop.getBody().getOperations().size();
llvm::SmallVector<mlir::Value, 3> origIVs;
for (auto arg : origLoop.getBody().getArguments())
origIVs.push_back(arg);
moveOpsAndReplaceIVs(origLoop, currentLoopOp, newIVs, origIVs, nOps,
rewriter);
return newLoops;
}