blob: 3eb655980a391ae7ce3c6e687da0028aec428da0 [file] [log] [blame]
//===-- CUFPredefinedVarToGPU.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Pass/Pass.h"
namespace fir {
#define GEN_PASS_DEF_CUFPREDEFINEDVARTOGPU
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir
using namespace mlir;
namespace {
template <typename OpTyX, typename OpTyY, typename OpTyZ>
static void createForAllDimensions(mlir::OpBuilder &builder, mlir::Location loc,
mlir::Value c1,
SmallVectorImpl<mlir::Value> &values,
bool incrementByOne = false) {
if (incrementByOne) {
auto baseX = OpTyX::create(builder, loc, builder.getI32Type());
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseX, c1));
auto baseY = OpTyY::create(builder, loc, builder.getI32Type());
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseY, c1));
auto baseZ = OpTyZ::create(builder, loc, builder.getI32Type());
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseZ, c1));
} else {
values.push_back(OpTyX::create(builder, loc, builder.getI32Type()));
values.push_back(OpTyY::create(builder, loc, builder.getI32Type()));
values.push_back(OpTyZ::create(builder, loc, builder.getI32Type()));
}
}
static constexpr llvm::StringRef builtinsModuleName = "__fortran_builtins";
static constexpr llvm::StringRef builtinVarPrefix = "__builtin_";
static constexpr llvm::StringRef threadidx = "threadidx";
static constexpr llvm::StringRef blockidx = "blockidx";
static constexpr llvm::StringRef blockdim = "blockdim";
static constexpr llvm::StringRef griddim = "griddim";
static constexpr unsigned field_x = 0;
static constexpr unsigned field_y = 1;
static constexpr unsigned field_z = 2;
std::string mangleBuiltin(llvm::StringRef varName) {
return "_QM" + builtinsModuleName.str() + "E" + builtinVarPrefix.str() +
varName.str();
}
static void processCoordinateOp(mlir::OpBuilder &builder, mlir::Location loc,
fir::CoordinateOp coordOp, unsigned fieldIdx,
mlir::Value &gpuValue) {
std::optional<llvm::ArrayRef<int32_t>> fieldIndices =
coordOp.getFieldIndices();
assert(fieldIndices && fieldIndices->size() == 1 &&
"expect only one coordinate");
if (static_cast<unsigned>((*fieldIndices)[0]) == fieldIdx) {
llvm::SmallVector<fir::LoadOp> opToErase;
for (mlir::OpOperand &coordUse : coordOp.getResult().getUses()) {
assert(mlir::isa<fir::LoadOp>(coordUse.getOwner()) &&
"only expect load op");
auto loadOp = mlir::dyn_cast<fir::LoadOp>(coordUse.getOwner());
loadOp.getResult().replaceAllUsesWith(gpuValue);
opToErase.push_back(loadOp);
}
for (auto op : opToErase)
op.erase();
}
}
static void
processDeclareOp(mlir::OpBuilder &builder, mlir::Location loc,
fir::DeclareOp declareOp, llvm::StringRef builtinVar,
llvm::SmallVectorImpl<mlir::Value> &gpuValues,
llvm::SmallVectorImpl<mlir::Operation *> &opsToDelete) {
if (declareOp.getUniqName().str().compare(builtinVar) == 0) {
for (mlir::OpOperand &use : declareOp.getResult().getUses()) {
fir::CoordinateOp coordOp =
mlir::dyn_cast<fir::CoordinateOp>(use.getOwner());
processCoordinateOp(builder, loc, coordOp, field_x, gpuValues[0]);
processCoordinateOp(builder, loc, coordOp, field_y, gpuValues[1]);
processCoordinateOp(builder, loc, coordOp, field_z, gpuValues[2]);
opsToDelete.push_back(coordOp);
}
opsToDelete.push_back(declareOp.getOperation());
if (declareOp.getMemref().getDefiningOp())
opsToDelete.push_back(declareOp.getMemref().getDefiningOp());
}
}
struct CUFPredefinedVarToGPU
: public fir::impl::CUFPredefinedVarToGPUBase<CUFPredefinedVarToGPU> {
void runOnOperation() override {
func::FuncOp funcOp = getOperation();
if (funcOp.getBody().empty())
return;
if (auto cudaProcAttr =
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
cuf::getProcAttrName())) {
if (cudaProcAttr.getValue() == cuf::ProcAttribute::Device ||
cudaProcAttr.getValue() == cuf::ProcAttribute::Global ||
cudaProcAttr.getValue() == cuf::ProcAttribute::GridGlobal ||
cudaProcAttr.getValue() == cuf::ProcAttribute::HostDevice) {
mlir::Location loc = funcOp.getLoc();
mlir::OpBuilder builder(funcOp.getContext());
builder.setInsertionPointToStart(&funcOp.getBody().front());
auto c1 = mlir::arith::ConstantOp::create(
builder, loc, builder.getI32Type(), builder.getI32IntegerAttr(1));
llvm::SmallVector<mlir::Value, 3> threadids, blockids, blockdims,
griddims;
createForAllDimensions<mlir::NVVM::ThreadIdXOp, mlir::NVVM::ThreadIdYOp,
mlir::NVVM::ThreadIdZOp>(
builder, loc, c1, threadids, /*incrementByOne=*/true);
createForAllDimensions<mlir::NVVM::BlockIdXOp, mlir::NVVM::BlockIdYOp,
mlir::NVVM::BlockIdZOp>(
builder, loc, c1, blockids, /*incrementByOne=*/true);
createForAllDimensions<mlir::NVVM::GridDimXOp, mlir::NVVM::GridDimYOp,
mlir::NVVM::GridDimZOp>(builder, loc, c1,
griddims);
createForAllDimensions<mlir::NVVM::BlockDimXOp, mlir::NVVM::BlockDimYOp,
mlir::NVVM::BlockDimZOp>(builder, loc, c1,
blockdims);
llvm::SmallVector<mlir::Operation *> opsToDelete;
for (auto declareOp : funcOp.getOps<fir::DeclareOp>()) {
processDeclareOp(builder, loc, declareOp, mangleBuiltin(threadidx),
threadids, opsToDelete);
processDeclareOp(builder, loc, declareOp, mangleBuiltin(blockidx),
blockids, opsToDelete);
processDeclareOp(builder, loc, declareOp, mangleBuiltin(blockdim),
blockdims, opsToDelete);
processDeclareOp(builder, loc, declareOp, mangleBuiltin(griddim),
griddims, opsToDelete);
}
for (auto op : opsToDelete)
op->erase();
}
}
}
};
} // end anonymous namespace