| //===- SimplifyIntrinsics.cpp -- replace intrinsics with simpler form -----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// This pass looks for suitable calls to runtime library for intrinsics that |
| /// can be simplified/specialized and replaces with a specialized function. |
| /// |
| /// For example, SUM(arr) can be specialized as a simple function with one loop, |
| /// compared to the three arguments (plus file & line info) that the runtime |
| /// call has - when the argument is a 1D-array (multiple loops may be needed |
| // for higher dimension arrays, of course) |
| /// |
| /// The general idea is that besides making the call simpler, it can also be |
| /// inlined by other passes that run after this pass, which further improves |
| /// performance, particularly when the work done in the function is trivial |
| /// and small in size. |
| //===----------------------------------------------------------------------===// |
| |
| #include "flang/Common/Fortran.h" |
| #include "flang/Optimizer/Builder/BoxValue.h" |
| #include "flang/Optimizer/Builder/FIRBuilder.h" |
| #include "flang/Optimizer/Builder/LowLevelIntrinsics.h" |
| #include "flang/Optimizer/Builder/Todo.h" |
| #include "flang/Optimizer/Dialect/FIROps.h" |
| #include "flang/Optimizer/Dialect/FIRType.h" |
| #include "flang/Optimizer/Dialect/Support/FIRContext.h" |
| #include "flang/Optimizer/HLFIR/HLFIRDialect.h" |
| #include "flang/Optimizer/Transforms/Passes.h" |
| #include "flang/Optimizer/Transforms/Utils.h" |
| #include "flang/Runtime/entry-names.h" |
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| #include "mlir/IR/Matchers.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Pass/Pass.h" |
| #include "mlir/Transforms/DialectConversion.h" |
| #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| #include "mlir/Transforms/RegionUtils.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <llvm/Support/ErrorHandling.h> |
| #include <mlir/Dialect/Arith/IR/Arith.h> |
| #include <mlir/IR/BuiltinTypes.h> |
| #include <mlir/IR/Location.h> |
| #include <mlir/IR/MLIRContext.h> |
| #include <mlir/IR/Value.h> |
| #include <mlir/Support/LLVM.h> |
| #include <optional> |
| |
| namespace fir { |
| #define GEN_PASS_DEF_SIMPLIFYINTRINSICS |
| #include "flang/Optimizer/Transforms/Passes.h.inc" |
| } // namespace fir |
| |
| #define DEBUG_TYPE "flang-simplify-intrinsics" |
| |
| namespace { |
| |
| class SimplifyIntrinsicsPass |
| : public fir::impl::SimplifyIntrinsicsBase<SimplifyIntrinsicsPass> { |
| using FunctionTypeGeneratorTy = |
| llvm::function_ref<mlir::FunctionType(fir::FirOpBuilder &)>; |
| using FunctionBodyGeneratorTy = |
| llvm::function_ref<void(fir::FirOpBuilder &, mlir::func::FuncOp &)>; |
| using GenReductionBodyTy = llvm::function_ref<void( |
| fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType)>; |
| |
| public: |
| using fir::impl::SimplifyIntrinsicsBase< |
| SimplifyIntrinsicsPass>::SimplifyIntrinsicsBase; |
| |
| /// Generate a new function implementing a simplified version |
| /// of a Fortran runtime function defined by \p basename name. |
| /// \p typeGenerator is a callback that generates the new function's type. |
| /// \p bodyGenerator is a callback that generates the new function's body. |
| /// The new function is created in the \p builder's Module. |
| mlir::func::FuncOp getOrCreateFunction(fir::FirOpBuilder &builder, |
| const mlir::StringRef &basename, |
| FunctionTypeGeneratorTy typeGenerator, |
| FunctionBodyGeneratorTy bodyGenerator); |
| void runOnOperation() override; |
| void getDependentDialects(mlir::DialectRegistry ®istry) const override; |
| |
| private: |
| /// Helper functions to replace a reduction type of call with its |
| /// simplified form. The actual function is generated using a callback |
| /// function. |
| /// \p call is the call to be replaced |
| /// \p kindMap is used to create FIROpBuilder |
| /// \p genBodyFunc is the callback that builds the replacement function |
| void simplifyIntOrFloatReduction(fir::CallOp call, |
| const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc); |
| void simplifyLogicalDim0Reduction(fir::CallOp call, |
| const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc); |
| void simplifyLogicalDim1Reduction(fir::CallOp call, |
| const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc); |
| void simplifyMinMaxlocReduction(fir::CallOp call, |
| const fir::KindMapping &kindMap, bool isMax); |
| void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc, |
| fir::FirOpBuilder &builder, |
| const mlir::StringRef &basename, |
| mlir::Type elementType); |
| }; |
| |
| } // namespace |
| |
| /// Create FirOpBuilder with the provided \p op insertion point |
| /// and \p kindMap additionally inheriting FastMathFlags from \p op. |
| static fir::FirOpBuilder |
| getSimplificationBuilder(mlir::Operation *op, const fir::KindMapping &kindMap) { |
| fir::FirOpBuilder builder{op, kindMap}; |
| auto fmi = mlir::dyn_cast<mlir::arith::ArithFastMathInterface>(*op); |
| if (!fmi) |
| return builder; |
| |
| // Regardless of what default FastMathFlags are used by FirOpBuilder, |
| // override them with FastMathFlags attached to the operation. |
| builder.setFastMathFlags(fmi.getFastMathFlagsAttr().getValue()); |
| return builder; |
| } |
| |
| /// Generate function type for the simplified version of RTNAME(Sum) and |
| /// similar functions with a fir.box<none> type returning \p elementType. |
| static mlir::FunctionType genNoneBoxType(fir::FirOpBuilder &builder, |
| const mlir::Type &elementType) { |
| mlir::Type boxType = fir::BoxType::get(builder.getNoneType()); |
| return mlir::FunctionType::get(builder.getContext(), {boxType}, |
| {elementType}); |
| } |
| |
| template <typename Op> |
| Op expectOp(mlir::Value val) { |
| if (Op op = mlir::dyn_cast_or_null<Op>(val.getDefiningOp())) |
| return op; |
| LLVM_DEBUG(llvm::dbgs() << "Didn't find expected " << Op::getOperationName() |
| << '\n'); |
| return nullptr; |
| } |
| |
| template <typename Op> |
| static mlir::Value findDefSingle(fir::ConvertOp op) { |
| if (auto defOp = expectOp<Op>(op->getOperand(0))) { |
| return defOp.getResult(); |
| } |
| return {}; |
| } |
| |
| template <typename... Ops> |
| static mlir::Value findDef(fir::ConvertOp op) { |
| mlir::Value defOp; |
| // Loop over the operation types given to see if any match, exiting once |
| // a match is found. Cast to void is needed to avoid compiler complaining |
| // that the result of expression is unused |
| (void)((defOp = findDefSingle<Ops>(op), (defOp)) || ...); |
| return defOp; |
| } |
| |
| static bool isOperandAbsent(mlir::Value val) { |
| if (auto op = expectOp<fir::ConvertOp>(val)) { |
| assert(op->getOperands().size() != 0); |
| return mlir::isa_and_nonnull<fir::AbsentOp>( |
| op->getOperand(0).getDefiningOp()); |
| } |
| return false; |
| } |
| |
| static bool isTrueOrNotConstant(mlir::Value val) { |
| if (auto op = expectOp<mlir::arith::ConstantOp>(val)) { |
| return !mlir::matchPattern(val, mlir::m_Zero()); |
| } |
| return true; |
| } |
| |
| static bool isZero(mlir::Value val) { |
| if (auto op = expectOp<fir::ConvertOp>(val)) { |
| assert(op->getOperands().size() != 0); |
| if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp()) |
| return mlir::matchPattern(defOp, mlir::m_Zero()); |
| } |
| return false; |
| } |
| |
| static mlir::Value findBoxDef(mlir::Value val) { |
| if (auto op = expectOp<fir::ConvertOp>(val)) { |
| assert(op->getOperands().size() != 0); |
| return findDef<fir::EmboxOp, fir::ReboxOp>(op); |
| } |
| return {}; |
| } |
| |
| static mlir::Value findMaskDef(mlir::Value val) { |
| if (auto op = expectOp<fir::ConvertOp>(val)) { |
| assert(op->getOperands().size() != 0); |
| return findDef<fir::EmboxOp, fir::ReboxOp, fir::AbsentOp>(op); |
| } |
| return {}; |
| } |
| |
| static unsigned getDimCount(mlir::Value val) { |
| // In order to find the dimensions count, we look for EmboxOp/ReboxOp |
| // and take the count from its *result* type. Note that in case |
| // of sliced emboxing the operand and the result of EmboxOp/ReboxOp |
| // have different types. |
| // Actually, we can take the box type from the operand of |
| // the first ConvertOp that has non-opaque box type that we meet |
| // going through the ConvertOp chain. |
| if (mlir::Value emboxVal = findBoxDef(val)) |
| if (auto boxTy = mlir::dyn_cast<fir::BoxType>(emboxVal.getType())) |
| if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(boxTy.getEleTy())) |
| return seqTy.getDimension(); |
| return 0; |
| } |
| |
| /// Given the call operation's box argument \p val, discover |
| /// the element type of the underlying array object. |
| /// \returns the element type or std::nullopt if the type cannot |
| /// be reliably found. |
| /// We expect that the argument is a result of fir.convert |
| /// with the destination type of !fir.box<none>. |
| static std::optional<mlir::Type> getArgElementType(mlir::Value val) { |
| mlir::Operation *defOp; |
| do { |
| defOp = val.getDefiningOp(); |
| // Analyze only sequences of convert operations. |
| if (!mlir::isa<fir::ConvertOp>(defOp)) |
| return std::nullopt; |
| val = defOp->getOperand(0); |
| // The convert operation is expected to convert from one |
| // box type to another box type. |
| auto boxType = mlir::cast<fir::BoxType>(val.getType()); |
| auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType); |
| if (!mlir::isa<mlir::NoneType>(elementType)) |
| return elementType; |
| } while (true); |
| } |
| |
| using BodyOpGeneratorTy = llvm::function_ref<mlir::Value( |
| fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value, |
| mlir::Value)>; |
| using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>( |
| fir::FirOpBuilder &, mlir::Location, mlir::Value)>; |
| |
| /// Generate the reduction loop into \p funcOp. |
| /// |
| /// \p initVal is a function, called to get the initial value for |
| /// the reduction value |
| /// \p genBody is called to fill in the actual reduciton operation |
| /// for example add for SUM, MAX for MAXVAL, etc. |
| /// \p rank is the rank of the input argument. |
| /// \p elementType is the type of the elements in the input array, |
| /// which may be different to the return type. |
| /// \p loopCond is called to generate the condition to continue or |
| /// not for IterWhile loops |
| /// \p unorderedOrInitalLoopCond contains either a boolean or bool |
| /// mlir constant, and controls the inital value for while loops |
| /// or if DoLoop is ordered/unordered. |
| |
| template <typename OP, typename T, int resultIndex> |
| static void |
| genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, |
| fir::InitValGeneratorTy initVal, ContinueLoopGenTy loopCond, |
| T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody, |
| unsigned rank, mlir::Type elementType, mlir::Location loc) { |
| |
| mlir::IndexType idxTy = builder.getIndexType(); |
| |
| mlir::Block::BlockArgListType args = funcOp.front().getArguments(); |
| mlir::Value arg = args[0]; |
| |
| mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); |
| |
| fir::SequenceType::Shape flatShape(rank, |
| fir::SequenceType::getUnknownExtent()); |
| mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); |
| mlir::Type boxArrTy = fir::BoxType::get(arrTy); |
| mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg); |
| mlir::Type resultType = funcOp.getResultTypes()[0]; |
| mlir::Value init = initVal(builder, loc, resultType); |
| |
| llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds; |
| |
| assert(rank > 0 && "rank cannot be zero"); |
| mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); |
| |
| // Compute all the upper bounds before the loop nest. |
| // It is not strictly necessary for performance, since the loop nest |
| // does not have any store operations and any LICM optimization |
| // should be able to optimize the redundancy. |
| for (unsigned i = 0; i < rank; ++i) { |
| mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); |
| auto dims = |
| builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx); |
| mlir::Value len = dims.getResult(1); |
| // We use C indexing here, so len-1 as loopcount |
| mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one); |
| bounds.push_back(loopCount); |
| } |
| // Create a loop nest consisting of OP operations. |
| // Collect the loops' induction variables into indices array, |
| // which will be used in the innermost loop to load the input |
| // array's element. |
| // The loops are generated such that the innermost loop processes |
| // the 0 dimension. |
| llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices; |
| for (unsigned i = rank; 0 < i; --i) { |
| mlir::Value step = one; |
| mlir::Value loopCount = bounds[i - 1]; |
| auto loop = builder.create<OP>(loc, zeroIdx, loopCount, step, |
| unorderedOrInitialLoopCond, |
| /*finalCountValue=*/false, init); |
| init = loop.getRegionIterArgs()[resultIndex]; |
| indices.push_back(loop.getInductionVar()); |
| // Set insertion point to the loop body so that the next loop |
| // is inserted inside the current one. |
| builder.setInsertionPointToStart(loop.getBody()); |
| } |
| |
| // Reverse the indices such that they are ordered as: |
| // <dim-0-idx, dim-1-idx, ...> |
| std::reverse(indices.begin(), indices.end()); |
| // We are in the innermost loop: generate the reduction body. |
| mlir::Type eleRefTy = builder.getRefType(elementType); |
| mlir::Value addr = |
| builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices); |
| mlir::Value elem = builder.create<fir::LoadOp>(loc, addr); |
| mlir::Value reductionVal = genBody(builder, loc, elementType, elem, init); |
| // Generate vector with condition to continue while loop at [0] and result |
| // from current loop at [1] for IterWhileOp loops, just result at [0] for |
| // DoLoopOp loops. |
| llvm::SmallVector<mlir::Value> results = loopCond(builder, loc, reductionVal); |
| |
| // Unwind the loop nest and insert ResultOp on each level |
| // to return the updated value of the reduction to the enclosing |
| // loops. |
| for (unsigned i = 0; i < rank; ++i) { |
| auto result = builder.create<fir::ResultOp>(loc, results); |
| // Proceed to the outer loop. |
| auto loop = mlir::cast<OP>(result->getParentOp()); |
| results = loop.getResults(); |
| // Set insertion point after the loop operation that we have |
| // just processed. |
| builder.setInsertionPointAfter(loop.getOperation()); |
| } |
| // End of loop nest. The insertion point is after the outermost loop. |
| // Return the reduction value from the function. |
| builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]); |
| } |
| |
| static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder, |
| mlir::Location loc, |
| mlir::Value reductionVal) { |
| return {reductionVal}; |
| } |
| |
| /// Generate function body of the simplified version of RTNAME(Sum) |
| /// with signature provided by \p funcOp. The caller is responsible |
| /// for saving/restoring the original insertion point of \p builder. |
| /// \p funcOp is expected to be empty on entry to this function. |
| /// \p rank specifies the rank of the input argument. |
| static void genRuntimeSumBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType) { |
| // function RTNAME(Sum)<T>x<rank>_simplified(arr) |
| // T, dimension(:) :: arr |
| // T sum = 0 |
| // integer iter |
| // do iter = 0, extent(arr) |
| // sum = sum + arr[iter] |
| // end do |
| // RTNAME(Sum)<T>x<rank>_simplified = sum |
| // end function RTNAME(Sum)<T>x<rank>_simplified |
| auto zero = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) { |
| const llvm::fltSemantics &sem = ty.getFloatSemantics(); |
| return builder.createRealConstant(loc, elementType, |
| llvm::APFloat::getZero(sem)); |
| } |
| return builder.createIntegerConstant(loc, elementType, 0); |
| }; |
| |
| auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType, mlir::Value elem1, |
| mlir::Value elem2) -> mlir::Value { |
| if (mlir::isa<mlir::FloatType>(elementType)) |
| return builder.create<mlir::arith::AddFOp>(loc, elem1, elem2); |
| if (mlir::isa<mlir::IntegerType>(elementType)) |
| return builder.create<mlir::arith::AddIOp>(loc, elem1, elem2); |
| |
| llvm_unreachable("unsupported type"); |
| return {}; |
| }; |
| |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| |
| genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, zero, nopLoopCond, |
| false, genBodyOp, rank, elementType, |
| loc); |
| } |
| |
| static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType) { |
| auto init = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) { |
| const llvm::fltSemantics &sem = ty.getFloatSemantics(); |
| return builder.createRealConstant( |
| loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/true)); |
| } |
| unsigned bits = elementType.getIntOrFloatBitWidth(); |
| int64_t minInt = llvm::APInt::getSignedMinValue(bits).getSExtValue(); |
| return builder.createIntegerConstant(loc, elementType, minInt); |
| }; |
| |
| auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType, mlir::Value elem1, |
| mlir::Value elem2) -> mlir::Value { |
| if (mlir::isa<mlir::FloatType>(elementType)) { |
| // arith.maxf later converted to llvm.intr.maxnum does not work |
| // correctly for NaNs and -0.0 (see maxnum/minnum pattern matching |
| // in LLVM's InstCombine pass). Moreover, llvm.intr.maxnum |
| // for F128 operands is lowered into fmaxl call by LLVM. |
| // This libm function may not work properly for F128 arguments |
| // on targets where long double is not F128. It is an LLVM issue, |
| // but we just use normal select here to resolve all the cases. |
| auto compare = builder.create<mlir::arith::CmpFOp>( |
| loc, mlir::arith::CmpFPredicate::OGT, elem1, elem2); |
| return builder.create<mlir::arith::SelectOp>(loc, compare, elem1, elem2); |
| } |
| if (mlir::isa<mlir::IntegerType>(elementType)) |
| return builder.create<mlir::arith::MaxSIOp>(loc, elem1, elem2); |
| |
| llvm_unreachable("unsupported type"); |
| return {}; |
| }; |
| |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| |
| genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, init, nopLoopCond, |
| false, genBodyOp, rank, elementType, |
| loc); |
| } |
| |
| static void genRuntimeCountBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType) { |
| auto zero = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| unsigned bits = elementType.getIntOrFloatBitWidth(); |
| int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue(); |
| return builder.createIntegerConstant(loc, elementType, zeroInt); |
| }; |
| |
| auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType, mlir::Value elem1, |
| mlir::Value elem2) -> mlir::Value { |
| auto zero32 = builder.createIntegerConstant(loc, elementType, 0); |
| auto zero64 = builder.createIntegerConstant(loc, builder.getI64Type(), 0); |
| auto one64 = builder.createIntegerConstant(loc, builder.getI64Type(), 1); |
| |
| auto compare = builder.create<mlir::arith::CmpIOp>( |
| loc, mlir::arith::CmpIPredicate::eq, elem1, zero32); |
| auto select = |
| builder.create<mlir::arith::SelectOp>(loc, compare, zero64, one64); |
| return builder.create<mlir::arith::AddIOp>(loc, select, elem2); |
| }; |
| |
| // Count always gets I32 for elementType as it converts logical input to |
| // logical<4> before passing to the function. |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| |
| genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, zero, nopLoopCond, |
| false, genBodyOp, rank, elementType, |
| loc); |
| } |
| |
| static void genRuntimeAnyBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType) { |
| auto zero = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| return builder.createIntegerConstant(loc, elementType, 0); |
| }; |
| |
| auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType, mlir::Value elem1, |
| mlir::Value elem2) -> mlir::Value { |
| auto zero = builder.createIntegerConstant(loc, elementType, 0); |
| return builder.create<mlir::arith::CmpIOp>( |
| loc, mlir::arith::CmpIPredicate::ne, elem1, zero); |
| }; |
| |
| auto continueCond = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Value reductionVal) { |
| auto one1 = builder.createIntegerConstant(loc, builder.getI1Type(), 1); |
| auto eor = builder.create<mlir::arith::XOrIOp>(loc, reductionVal, one1); |
| llvm::SmallVector<mlir::Value> results = {eor, reductionVal}; |
| return results; |
| }; |
| |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| mlir::Value ok = builder.createBool(loc, true); |
| |
| genReductionLoop<fir::IterWhileOp, mlir::Value, 1>( |
| builder, funcOp, zero, continueCond, ok, genBodyOp, rank, elementType, |
| loc); |
| } |
| |
| static void genRuntimeAllBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, unsigned rank, |
| mlir::Type elementType) { |
| auto one = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| return builder.createIntegerConstant(loc, elementType, 1); |
| }; |
| |
| auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType, mlir::Value elem1, |
| mlir::Value elem2) -> mlir::Value { |
| auto zero = builder.createIntegerConstant(loc, elementType, 0); |
| return builder.create<mlir::arith::CmpIOp>( |
| loc, mlir::arith::CmpIPredicate::ne, elem1, zero); |
| }; |
| |
| auto continueCond = [](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Value reductionVal) { |
| llvm::SmallVector<mlir::Value> results = {reductionVal, reductionVal}; |
| return results; |
| }; |
| |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| mlir::Value ok = builder.createBool(loc, true); |
| |
| genReductionLoop<fir::IterWhileOp, mlir::Value, 1>( |
| builder, funcOp, one, continueCond, ok, genBodyOp, rank, elementType, |
| loc); |
| } |
| |
| static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder, |
| unsigned int rank) { |
| mlir::Type boxType = fir::BoxType::get(builder.getNoneType()); |
| mlir::Type boxRefType = builder.getRefType(boxType); |
| |
| return mlir::FunctionType::get(builder.getContext(), |
| {boxRefType, boxType, boxType}, {}); |
| } |
| |
| // Produces a loop nest for a Minloc intrinsic. |
| void fir::genMinMaxlocReductionLoop( |
| fir::FirOpBuilder &builder, mlir::Value array, |
| fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody, |
| fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType, |
| mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr, |
| bool maskMayBeLogicalScalar) { |
| mlir::IndexType idxTy = builder.getIndexType(); |
| |
| mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); |
| |
| fir::SequenceType::Shape flatShape(rank, |
| fir::SequenceType::getUnknownExtent()); |
| mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); |
| mlir::Type boxArrTy = fir::BoxType::get(arrTy); |
| array = builder.create<fir::ConvertOp>(loc, boxArrTy, array); |
| |
| mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType()); |
| mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1); |
| mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0); |
| mlir::Value flagRef = builder.createTemporary(loc, resultElemType); |
| builder.create<fir::StoreOp>(loc, zero, flagRef); |
| |
| mlir::Value init = initVal(builder, loc, elementType); |
| llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds; |
| |
| assert(rank > 0 && "rank cannot be zero"); |
| mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); |
| |
| // Compute all the upper bounds before the loop nest. |
| // It is not strictly necessary for performance, since the loop nest |
| // does not have any store operations and any LICM optimization |
| // should be able to optimize the redundancy. |
| for (unsigned i = 0; i < rank; ++i) { |
| mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); |
| auto dims = |
| builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx); |
| mlir::Value len = dims.getResult(1); |
| // We use C indexing here, so len-1 as loopcount |
| mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one); |
| bounds.push_back(loopCount); |
| } |
| // Create a loop nest consisting of OP operations. |
| // Collect the loops' induction variables into indices array, |
| // which will be used in the innermost loop to load the input |
| // array's element. |
| // The loops are generated such that the innermost loop processes |
| // the 0 dimension. |
| llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices; |
| for (unsigned i = rank; 0 < i; --i) { |
| mlir::Value step = one; |
| mlir::Value loopCount = bounds[i - 1]; |
| auto loop = |
| builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false, |
| /*finalCountValue=*/false, init); |
| init = loop.getRegionIterArgs()[0]; |
| indices.push_back(loop.getInductionVar()); |
| // Set insertion point to the loop body so that the next loop |
| // is inserted inside the current one. |
| builder.setInsertionPointToStart(loop.getBody()); |
| } |
| |
| // Reverse the indices such that they are ordered as: |
| // <dim-0-idx, dim-1-idx, ...> |
| std::reverse(indices.begin(), indices.end()); |
| mlir::Value reductionVal = |
| genBody(builder, loc, elementType, array, flagRef, init, indices); |
| |
| // Unwind the loop nest and insert ResultOp on each level |
| // to return the updated value of the reduction to the enclosing |
| // loops. |
| for (unsigned i = 0; i < rank; ++i) { |
| auto result = builder.create<fir::ResultOp>(loc, reductionVal); |
| // Proceed to the outer loop. |
| auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp()); |
| reductionVal = loop.getResult(0); |
| // Set insertion point after the loop operation that we have |
| // just processed. |
| builder.setInsertionPointAfter(loop.getOperation()); |
| } |
| // End of loop nest. The insertion point is after the outermost loop. |
| if (maskMayBeLogicalScalar) { |
| if (fir::IfOp ifOp = |
| mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) { |
| builder.create<fir::ResultOp>(loc, reductionVal); |
| builder.setInsertionPointAfter(ifOp); |
| // Redefine flagSet to escape scope of ifOp |
| flagSet = builder.createIntegerConstant(loc, resultElemType, 1); |
| reductionVal = ifOp.getResult(0); |
| } |
| } |
| } |
| |
| static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, bool isMax, |
| unsigned rank, int maskRank, |
| mlir::Type elementType, |
| mlir::Type maskElemType, |
| mlir::Type resultElemTy, bool isDim) { |
| auto init = [isMax](fir::FirOpBuilder builder, mlir::Location loc, |
| mlir::Type elementType) { |
| if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) { |
| const llvm::fltSemantics &sem = ty.getFloatSemantics(); |
| llvm::APFloat limit = llvm::APFloat::getInf(sem, /*Negative=*/isMax); |
| return builder.createRealConstant(loc, elementType, limit); |
| } |
| unsigned bits = elementType.getIntOrFloatBitWidth(); |
| int64_t initValue = (isMax ? llvm::APInt::getSignedMinValue(bits) |
| : llvm::APInt::getSignedMaxValue(bits)) |
| .getSExtValue(); |
| return builder.createIntegerConstant(loc, elementType, initValue); |
| }; |
| |
| mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| |
| mlir::Value mask = funcOp.front().getArgument(2); |
| |
| // Set up result array in case of early exit / 0 length array |
| mlir::IndexType idxTy = builder.getIndexType(); |
| mlir::Type resultTy = fir::SequenceType::get(rank, resultElemTy); |
| mlir::Type resultHeapTy = fir::HeapType::get(resultTy); |
| mlir::Type resultBoxTy = fir::BoxType::get(resultHeapTy); |
| |
| mlir::Value returnValue = builder.createIntegerConstant(loc, resultElemTy, 0); |
| mlir::Value resultArrSize = builder.createIntegerConstant(loc, idxTy, rank); |
| |
| mlir::Value resultArrInit = builder.create<fir::AllocMemOp>(loc, resultTy); |
| mlir::Value resultArrShape = builder.create<fir::ShapeOp>(loc, resultArrSize); |
| mlir::Value resultArr = builder.create<fir::EmboxOp>( |
| loc, resultBoxTy, resultArrInit, resultArrShape); |
| |
| mlir::Type resultRefTy = builder.getRefType(resultElemTy); |
| |
| if (maskRank > 0) { |
| fir::SequenceType::Shape flatShape(rank, |
| fir::SequenceType::getUnknownExtent()); |
| mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType); |
| mlir::Type boxMaskTy = fir::BoxType::get(maskTy); |
| mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, mask); |
| } |
| |
| for (unsigned int i = 0; i < rank; ++i) { |
| mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); |
| mlir::Value resultElemAddr = |
| builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index); |
| builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr); |
| } |
| |
| auto genBodyOp = |
| [&rank, &resultArr, isMax, &mask, &maskElemType, &maskRank]( |
| fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType, |
| mlir::Value array, mlir::Value flagRef, mlir::Value reduction, |
| const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value { |
| // We are in the innermost loop: generate the reduction body. |
| if (maskRank > 0) { |
| mlir::Type logicalRef = builder.getRefType(maskElemType); |
| mlir::Value maskAddr = |
| builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices); |
| mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr); |
| |
| // fir::IfOp requires argument to be I1 - won't accept logical or any |
| // other Integer. |
| mlir::Type ifCompatType = builder.getI1Type(); |
| mlir::Value ifCompatElem = |
| builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem); |
| |
| llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType}; |
| fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem, |
| /*withElseRegion=*/true); |
| builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| } |
| |
| // Set flag that mask was true at some point |
| mlir::Value flagSet = builder.createIntegerConstant( |
| loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1); |
| mlir::Value isFirst = builder.create<fir::LoadOp>(loc, flagRef); |
| mlir::Type eleRefTy = builder.getRefType(elementType); |
| mlir::Value addr = |
| builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices); |
| mlir::Value elem = builder.create<fir::LoadOp>(loc, addr); |
| |
| mlir::Value cmp; |
| if (mlir::isa<mlir::FloatType>(elementType)) { |
| // For FP reductions we want the first smallest value to be used, that |
| // is not NaN. A OGL/OLT condition will usually work for this unless all |
| // the values are Nan or Inf. This follows the same logic as |
| // NumericCompare for Minloc/Maxlox in extrema.cpp. |
| cmp = builder.create<mlir::arith::CmpFOp>( |
| loc, |
| isMax ? mlir::arith::CmpFPredicate::OGT |
| : mlir::arith::CmpFPredicate::OLT, |
| elem, reduction); |
| |
| mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>( |
| loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction); |
| mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>( |
| loc, mlir::arith::CmpFPredicate::OEQ, elem, elem); |
| cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2); |
| cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan); |
| } else if (mlir::isa<mlir::IntegerType>(elementType)) { |
| cmp = builder.create<mlir::arith::CmpIOp>( |
| loc, |
| isMax ? mlir::arith::CmpIPredicate::sgt |
| : mlir::arith::CmpIPredicate::slt, |
| elem, reduction); |
| } else { |
| llvm_unreachable("unsupported type"); |
| } |
| |
| // The condition used for the loop is isFirst || <the condition above>. |
| isFirst = builder.create<fir::ConvertOp>(loc, cmp.getType(), isFirst); |
| isFirst = builder.create<mlir::arith::XOrIOp>( |
| loc, isFirst, builder.createIntegerConstant(loc, cmp.getType(), 1)); |
| cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, isFirst); |
| fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp, |
| /*withElseRegion*/ true); |
| |
| builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| builder.create<fir::StoreOp>(loc, flagSet, flagRef); |
| mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType()); |
| mlir::Type returnRefTy = builder.getRefType(resultElemTy); |
| mlir::IndexType idxTy = builder.getIndexType(); |
| |
| mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1); |
| |
| for (unsigned int i = 0; i < rank; ++i) { |
| mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); |
| mlir::Value resultElemAddr = |
| builder.create<fir::CoordinateOp>(loc, returnRefTy, resultArr, index); |
| mlir::Value convert = |
| builder.create<fir::ConvertOp>(loc, resultElemTy, indices[i]); |
| mlir::Value fortranIndex = |
| builder.create<mlir::arith::AddIOp>(loc, convert, one); |
| builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr); |
| } |
| builder.create<fir::ResultOp>(loc, elem); |
| builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
| builder.create<fir::ResultOp>(loc, reduction); |
| builder.setInsertionPointAfter(ifOp); |
| mlir::Value reductionVal = ifOp.getResult(0); |
| |
| // Close the mask if needed |
| if (maskRank > 0) { |
| fir::IfOp ifOp = |
| mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp()); |
| builder.create<fir::ResultOp>(loc, reductionVal); |
| builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
| builder.create<fir::ResultOp>(loc, reduction); |
| reductionVal = ifOp.getResult(0); |
| builder.setInsertionPointAfter(ifOp); |
| } |
| |
| return reductionVal; |
| }; |
| |
| // if mask is a logical scalar, we can check its value before the main loop |
| // and either ignore the fact it is there or exit early. |
| if (maskRank == 0) { |
| mlir::Type logical = builder.getI1Type(); |
| mlir::IndexType idxTy = builder.getIndexType(); |
| |
| fir::SequenceType::Shape singleElement(1, 1); |
| mlir::Type arrTy = fir::SequenceType::get(singleElement, logical); |
| mlir::Type boxArrTy = fir::BoxType::get(arrTy); |
| mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, mask); |
| |
| mlir::Value indx = builder.createIntegerConstant(loc, idxTy, 0); |
| mlir::Type logicalRefTy = builder.getRefType(logical); |
| mlir::Value condAddr = |
| builder.create<fir::CoordinateOp>(loc, logicalRefTy, array, indx); |
| mlir::Value cond = builder.create<fir::LoadOp>(loc, condAddr); |
| |
| fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cond, |
| /*withElseRegion=*/true); |
| |
| builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
| mlir::Value basicValue; |
| if (mlir::isa<mlir::IntegerType>(elementType)) { |
| basicValue = builder.createIntegerConstant(loc, elementType, 0); |
| } else { |
| basicValue = builder.createRealConstant(loc, elementType, 0); |
| } |
| builder.create<fir::ResultOp>(loc, basicValue); |
| |
| builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| } |
| auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc, |
| const mlir::Type &resultElemType, mlir::Value resultArr, |
| mlir::Value index) { |
| mlir::Type resultRefTy = builder.getRefType(resultElemType); |
| return builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, |
| index); |
| }; |
| |
| genMinMaxlocReductionLoop(builder, funcOp.front().getArgument(1), init, |
| genBodyOp, getAddrFn, rank, elementType, loc, |
| maskElemType, resultArr, maskRank == 0); |
| |
| // Store newly created output array to the reference passed in |
| if (isDim) { |
| mlir::Type resultBoxTy = |
| fir::BoxType::get(fir::HeapType::get(resultElemTy)); |
| mlir::Value outputArr = builder.create<fir::ConvertOp>( |
| loc, builder.getRefType(resultBoxTy), funcOp.front().getArgument(0)); |
| mlir::Value resultArrScalar = builder.create<fir::ConvertOp>( |
| loc, fir::HeapType::get(resultElemTy), resultArrInit); |
| mlir::Value resultBox = |
| builder.create<fir::EmboxOp>(loc, resultBoxTy, resultArrScalar); |
| builder.create<fir::StoreOp>(loc, resultBox, outputArr); |
| } else { |
| fir::SequenceType::Shape resultShape(1, rank); |
| mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemTy); |
| mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy); |
| mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy); |
| mlir::Type outputRefTy = builder.getRefType(outputBoxTy); |
| mlir::Value outputArr = builder.create<fir::ConvertOp>( |
| loc, outputRefTy, funcOp.front().getArgument(0)); |
| builder.create<fir::StoreOp>(loc, resultArr, outputArr); |
| } |
| |
| builder.create<mlir::func::ReturnOp>(loc); |
| } |
| |
| /// Generate function type for the simplified version of RTNAME(DotProduct) |
| /// operating on the given \p elementType. |
| static mlir::FunctionType genRuntimeDotType(fir::FirOpBuilder &builder, |
| const mlir::Type &elementType) { |
| mlir::Type boxType = fir::BoxType::get(builder.getNoneType()); |
| return mlir::FunctionType::get(builder.getContext(), {boxType, boxType}, |
| {elementType}); |
| } |
| |
| /// Generate function body of the simplified version of RTNAME(DotProduct) |
| /// with signature provided by \p funcOp. The caller is responsible |
| /// for saving/restoring the original insertion point of \p builder. |
| /// \p funcOp is expected to be empty on entry to this function. |
| /// \p arg1ElementTy and \p arg2ElementTy specify elements types |
| /// of the underlying array objects - they are used to generate proper |
| /// element accesses. |
| static void genRuntimeDotBody(fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp, |
| mlir::Type arg1ElementTy, |
| mlir::Type arg2ElementTy) { |
| // function RTNAME(DotProduct)<T>_simplified(arr1, arr2) |
| // T, dimension(:) :: arr1, arr2 |
| // T product = 0 |
| // integer iter |
| // do iter = 0, extent(arr1) |
| // product = product + arr1[iter] * arr2[iter] |
| // end do |
| // RTNAME(ADotProduct)<T>_simplified = product |
| // end function RTNAME(DotProduct)<T>_simplified |
| auto loc = mlir::UnknownLoc::get(builder.getContext()); |
| mlir::Type resultElementType = funcOp.getResultTypes()[0]; |
| builder.setInsertionPointToEnd(funcOp.addEntryBlock()); |
| |
| mlir::IndexType idxTy = builder.getIndexType(); |
| |
| mlir::Value zero = |
| mlir::isa<mlir::FloatType>(resultElementType) |
| ? builder.createRealConstant(loc, resultElementType, 0.0) |
| : builder.createIntegerConstant(loc, resultElementType, 0); |
| |
| mlir::Block::BlockArgListType args = funcOp.front().getArguments(); |
| mlir::Value arg1 = args[0]; |
| mlir::Value arg2 = args[1]; |
| |
| mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); |
| |
| fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()}; |
| mlir::Type arrTy1 = fir::SequenceType::get(flatShape, arg1ElementTy); |
| mlir::Type boxArrTy1 = fir::BoxType::get(arrTy1); |
| mlir::Value array1 = builder.create<fir::ConvertOp>(loc, boxArrTy1, arg1); |
| mlir::Type arrTy2 = fir::SequenceType::get(flatShape, arg2ElementTy); |
| mlir::Type boxArrTy2 = fir::BoxType::get(arrTy2); |
| mlir::Value array2 = builder.create<fir::ConvertOp>(loc, boxArrTy2, arg2); |
| // This version takes the loop trip count from the first argument. |
| // If the first argument's box has unknown (at compilation time) |
| // extent, then it may be better to take the extent from the second |
| // argument - so that after inlining the loop may be better optimized, e.g. |
| // fully unrolled. This requires generating two versions of the simplified |
| // function and some analysis at the call site to choose which version |
| // is more profitable to call. |
| // Note that we can assume that both arguments have the same extent. |
| auto dims = |
| builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array1, zeroIdx); |
| mlir::Value len = dims.getResult(1); |
| mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); |
| mlir::Value step = one; |
| |
| // We use C indexing here, so len-1 as loopcount |
| mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one); |
| auto loop = builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, |
| /*unordered=*/false, |
| /*finalCountValue=*/false, zero); |
| mlir::Value sumVal = loop.getRegionIterArgs()[0]; |
| |
| // Begin loop code |
| mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint(); |
| builder.setInsertionPointToStart(loop.getBody()); |
| |
| mlir::Type eleRef1Ty = builder.getRefType(arg1ElementTy); |
| mlir::Value index = loop.getInductionVar(); |
| mlir::Value addr1 = |
| builder.create<fir::CoordinateOp>(loc, eleRef1Ty, array1, index); |
| mlir::Value elem1 = builder.create<fir::LoadOp>(loc, addr1); |
| // Convert to the result type. |
| elem1 = builder.create<fir::ConvertOp>(loc, resultElementType, elem1); |
| |
| mlir::Type eleRef2Ty = builder.getRefType(arg2ElementTy); |
| mlir::Value addr2 = |
| builder.create<fir::CoordinateOp>(loc, eleRef2Ty, array2, index); |
| mlir::Value elem2 = builder.create<fir::LoadOp>(loc, addr2); |
| // Convert to the result type. |
| elem2 = builder.create<fir::ConvertOp>(loc, resultElementType, elem2); |
| |
| if (mlir::isa<mlir::FloatType>(resultElementType)) |
| sumVal = builder.create<mlir::arith::AddFOp>( |
| loc, builder.create<mlir::arith::MulFOp>(loc, elem1, elem2), sumVal); |
| else if (mlir::isa<mlir::IntegerType>(resultElementType)) |
| sumVal = builder.create<mlir::arith::AddIOp>( |
| loc, builder.create<mlir::arith::MulIOp>(loc, elem1, elem2), sumVal); |
| else |
| llvm_unreachable("unsupported type"); |
| |
| builder.create<fir::ResultOp>(loc, sumVal); |
| // End of loop. |
| builder.restoreInsertionPoint(loopEndPt); |
| |
| mlir::Value resultVal = loop.getResult(0); |
| builder.create<mlir::func::ReturnOp>(loc, resultVal); |
| } |
| |
| mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction( |
| fir::FirOpBuilder &builder, const mlir::StringRef &baseName, |
| FunctionTypeGeneratorTy typeGenerator, |
| FunctionBodyGeneratorTy bodyGenerator) { |
| // WARNING: if the function generated here changes its signature |
| // or behavior (the body code), we should probably embed some |
| // versioning information into its name, otherwise libraries |
| // statically linked with older versions of Flang may stop |
| // working with object files created with newer Flang. |
| // We can also avoid this by using internal linkage, but |
| // this may increase the size of final executable/shared library. |
| std::string replacementName = mlir::Twine{baseName, "_simplified"}.str(); |
| // If we already have a function, just return it. |
| mlir::func::FuncOp newFunc = builder.getNamedFunction(replacementName); |
| mlir::FunctionType fType = typeGenerator(builder); |
| if (newFunc) { |
| assert(newFunc.getFunctionType() == fType && |
| "type mismatch for simplified function"); |
| return newFunc; |
| } |
| |
| // Need to build the function! |
| auto loc = mlir::UnknownLoc::get(builder.getContext()); |
| newFunc = builder.createFunction(loc, replacementName, fType); |
| auto inlineLinkage = mlir::LLVM::linkage::Linkage::LinkonceODR; |
| auto linkage = |
| mlir::LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage); |
| newFunc->setAttr("llvm.linkage", linkage); |
| |
| // Save the position of the original call. |
| mlir::OpBuilder::InsertPoint insertPt = builder.saveInsertionPoint(); |
| |
| bodyGenerator(builder, newFunc); |
| |
| // Now back to where we were adding code earlier... |
| builder.restoreInsertionPoint(insertPt); |
| |
| return newFunc; |
| } |
| |
| void SimplifyIntrinsicsPass::simplifyIntOrFloatReduction( |
| fir::CallOp call, const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc) { |
| // args[1] and args[2] are source filename and line number, ignored. |
| mlir::Operation::operand_range args = call.getArgs(); |
| |
| const mlir::Value &dim = args[3]; |
| const mlir::Value &mask = args[4]; |
| // dim is zero when it is absent, which is an implementation |
| // detail in the runtime library. |
| |
| bool dimAndMaskAbsent = isZero(dim) && isOperandAbsent(mask); |
| unsigned rank = getDimCount(args[0]); |
| |
| // Rank is set to 0 for assumed shape arrays, don't simplify |
| // in these cases |
| if (!(dimAndMaskAbsent && rank > 0)) |
| return; |
| |
| mlir::Type resultType = call.getResult(0).getType(); |
| |
| if (!mlir::isa<mlir::FloatType>(resultType) && |
| !mlir::isa<mlir::IntegerType>(resultType)) |
| return; |
| |
| auto argType = getArgElementType(args[0]); |
| if (!argType) |
| return; |
| assert(*argType == resultType && |
| "Argument/result types mismatch in reduction"); |
| |
| mlir::SymbolRefAttr callee = call.getCalleeAttr(); |
| |
| fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)}; |
| std::string fmfString{builder.getFastMathFlagsString()}; |
| std::string funcName = |
| (mlir::Twine{callee.getLeafReference().getValue(), "x"} + |
| mlir::Twine{rank} + |
| // We must mangle the generated function name with FastMathFlags |
| // value. |
| (fmfString.empty() ? mlir::Twine{} : mlir::Twine{"_", fmfString})) |
| .str(); |
| |
| simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName, |
| resultType); |
| } |
| |
| void SimplifyIntrinsicsPass::simplifyLogicalDim0Reduction( |
| fir::CallOp call, const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc) { |
| |
| mlir::Operation::operand_range args = call.getArgs(); |
| const mlir::Value &dim = args[3]; |
| unsigned rank = getDimCount(args[0]); |
| |
| // getDimCount returns a rank of 0 for assumed shape arrays, don't simplify in |
| // these cases. |
| if (!(isZero(dim) && rank > 0)) |
| return; |
| |
| mlir::Value inputBox = findBoxDef(args[0]); |
| |
| mlir::Type elementType = hlfir::getFortranElementType(inputBox.getType()); |
| mlir::SymbolRefAttr callee = call.getCalleeAttr(); |
| |
| fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)}; |
| |
| // Treating logicals as integers makes things a lot easier |
| fir::LogicalType logicalType = { |
| mlir::dyn_cast<fir::LogicalType>(elementType)}; |
| fir::KindTy kind = logicalType.getFKind(); |
| mlir::Type intElementType = builder.getIntegerType(kind * 8); |
| |
| // Mangle kind into function name as it is not done by default |
| std::string funcName = |
| (mlir::Twine{callee.getLeafReference().getValue(), "Logical"} + |
| mlir::Twine{kind} + "x" + mlir::Twine{rank}) |
| .str(); |
| |
| simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName, |
| intElementType); |
| } |
| |
| void SimplifyIntrinsicsPass::simplifyLogicalDim1Reduction( |
| fir::CallOp call, const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc) { |
| |
| mlir::Operation::operand_range args = call.getArgs(); |
| mlir::SymbolRefAttr callee = call.getCalleeAttr(); |
| mlir::StringRef funcNameBase = callee.getLeafReference().getValue(); |
| unsigned rank = getDimCount(args[0]); |
| |
| // getDimCount returns a rank of 0 for assumed shape arrays, don't simplify in |
| // these cases. We check for Dim at the end as some logical functions (Any, |
| // All) set dim to 1 instead of 0 when the argument is not present. |
| if (funcNameBase.ends_with("Dim") || !(rank > 0)) |
| return; |
| |
| mlir::Value inputBox = findBoxDef(args[0]); |
| mlir::Type elementType = hlfir::getFortranElementType(inputBox.getType()); |
| |
| fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)}; |
| |
| // Treating logicals as integers makes things a lot easier |
| fir::LogicalType logicalType = { |
| mlir::dyn_cast<fir::LogicalType>(elementType)}; |
| fir::KindTy kind = logicalType.getFKind(); |
| mlir::Type intElementType = builder.getIntegerType(kind * 8); |
| |
| // Mangle kind into function name as it is not done by default |
| std::string funcName = |
| (mlir::Twine{callee.getLeafReference().getValue(), "Logical"} + |
| mlir::Twine{kind} + "x" + mlir::Twine{rank}) |
| .str(); |
| |
| simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName, |
| intElementType); |
| } |
| |
| void SimplifyIntrinsicsPass::simplifyMinMaxlocReduction( |
| fir::CallOp call, const fir::KindMapping &kindMap, bool isMax) { |
| |
| mlir::Operation::operand_range args = call.getArgs(); |
| |
| mlir::SymbolRefAttr callee = call.getCalleeAttr(); |
| mlir::StringRef funcNameBase = callee.getLeafReference().getValue(); |
| bool isDim = funcNameBase.ends_with("Dim"); |
| mlir::Value back = args[isDim ? 7 : 6]; |
| if (isTrueOrNotConstant(back)) |
| return; |
| |
| mlir::Value mask = args[isDim ? 6 : 5]; |
| mlir::Value maskDef = findMaskDef(mask); |
| |
| // maskDef is set to NULL when the defining op is not one we accept. |
| // This tends to be because it is a selectOp, in which case let the |
| // runtime deal with it. |
| if (maskDef == NULL) |
| return; |
| |
| unsigned rank = getDimCount(args[1]); |
| if ((isDim && rank != 1) || !(rank > 0)) |
| return; |
| |
| fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)}; |
| mlir::Location loc = call.getLoc(); |
| auto inputBox = findBoxDef(args[1]); |
| mlir::Type inputType = hlfir::getFortranElementType(inputBox.getType()); |
| |
| if (mlir::isa<fir::CharacterType>(inputType)) |
| return; |
| |
| int maskRank; |
| fir::KindTy kind = 0; |
| mlir::Type logicalElemType = builder.getI1Type(); |
| if (isOperandAbsent(mask)) { |
| maskRank = -1; |
| } else { |
| maskRank = getDimCount(mask); |
| mlir::Type maskElemTy = hlfir::getFortranElementType(maskDef.getType()); |
| fir::LogicalType logicalFirType = { |
| mlir::dyn_cast<fir::LogicalType>(maskElemTy)}; |
| kind = logicalFirType.getFKind(); |
| // Convert fir::LogicalType to mlir::Type |
| logicalElemType = logicalFirType; |
| } |
| |
| mlir::Operation *outputDef = args[0].getDefiningOp(); |
| mlir::Value outputAlloc = outputDef->getOperand(0); |
| mlir::Type outType = hlfir::getFortranElementType(outputAlloc.getType()); |
| |
| std::string fmfString{builder.getFastMathFlagsString()}; |
| std::string funcName = |
| (mlir::Twine{callee.getLeafReference().getValue(), "x"} + |
| mlir::Twine{rank} + |
| (maskRank >= 0 |
| ? "_Logical" + mlir::Twine{kind} + "x" + mlir::Twine{maskRank} |
| : "") + |
| "_") |
| .str(); |
| |
| llvm::raw_string_ostream nameOS(funcName); |
| outType.print(nameOS); |
| if (isDim) |
| nameOS << '_' << inputType; |
| nameOS << '_' << fmfString; |
| |
| auto typeGenerator = [rank](fir::FirOpBuilder &builder) { |
| return genRuntimeMinlocType(builder, rank); |
| }; |
| auto bodyGenerator = [rank, maskRank, inputType, logicalElemType, outType, |
| isMax, isDim](fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp) { |
| genRuntimeMinMaxlocBody(builder, funcOp, isMax, rank, maskRank, inputType, |
| logicalElemType, outType, isDim); |
| }; |
| |
| mlir::func::FuncOp newFunc = |
| getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator); |
| builder.create<fir::CallOp>(loc, newFunc, |
| mlir::ValueRange{args[0], args[1], mask}); |
| call->dropAllReferences(); |
| call->erase(); |
| } |
| |
| void SimplifyIntrinsicsPass::simplifyReductionBody( |
| fir::CallOp call, const fir::KindMapping &kindMap, |
| GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder, |
| const mlir::StringRef &funcName, mlir::Type elementType) { |
| |
| mlir::Operation::operand_range args = call.getArgs(); |
| |
| mlir::Type resultType = call.getResult(0).getType(); |
| unsigned rank = getDimCount(args[0]); |
| |
| mlir::Location loc = call.getLoc(); |
| |
| auto typeGenerator = [&resultType](fir::FirOpBuilder &builder) { |
| return genNoneBoxType(builder, resultType); |
| }; |
| auto bodyGenerator = [&rank, &genBodyFunc, |
| &elementType](fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp) { |
| genBodyFunc(builder, funcOp, rank, elementType); |
| }; |
| // Mangle the function name with the rank value as "x<rank>". |
| mlir::func::FuncOp newFunc = |
| getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator); |
| auto newCall = |
| builder.create<fir::CallOp>(loc, newFunc, mlir::ValueRange{args[0]}); |
| call->replaceAllUsesWith(newCall.getResults()); |
| call->dropAllReferences(); |
| call->erase(); |
| } |
| |
| void SimplifyIntrinsicsPass::runOnOperation() { |
| LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n"); |
| mlir::ModuleOp module = getOperation(); |
| fir::KindMapping kindMap = fir::getKindMapping(module); |
| module.walk([&](mlir::Operation *op) { |
| if (auto call = mlir::dyn_cast<fir::CallOp>(op)) { |
| if (mlir::SymbolRefAttr callee = call.getCalleeAttr()) { |
| mlir::StringRef funcName = callee.getLeafReference().getValue(); |
| // Replace call to runtime function for SUM when it has single |
| // argument (no dim or mask argument) for 1D arrays with either |
| // Integer4 or Real8 types. Other forms are ignored. |
| // The new function is added to the module. |
| // |
| // Prototype for runtime call (from sum.cpp): |
| // RTNAME(Sum<T>)(const Descriptor &x, const char *source, int line, |
| // int dim, const Descriptor *mask) |
| // |
| if (funcName.starts_with(RTNAME_STRING(Sum))) { |
| simplifyIntOrFloatReduction(call, kindMap, genRuntimeSumBody); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(DotProduct))) { |
| LLVM_DEBUG(llvm::dbgs() << "Handling " << funcName << "\n"); |
| LLVM_DEBUG(llvm::dbgs() << "Call operation:\n"; op->dump(); |
| llvm::dbgs() << "\n"); |
| mlir::Operation::operand_range args = call.getArgs(); |
| const mlir::Value &v1 = args[0]; |
| const mlir::Value &v2 = args[1]; |
| mlir::Location loc = call.getLoc(); |
| fir::FirOpBuilder builder{getSimplificationBuilder(op, kindMap)}; |
| // Stringize the builder's FastMathFlags flags for mangling |
| // the generated function name. |
| std::string fmfString{builder.getFastMathFlagsString()}; |
| |
| mlir::Type type = call.getResult(0).getType(); |
| if (!mlir::isa<mlir::FloatType>(type) && |
| !mlir::isa<mlir::IntegerType>(type)) |
| return; |
| |
| // Try to find the element types of the boxed arguments. |
| auto arg1Type = getArgElementType(v1); |
| auto arg2Type = getArgElementType(v2); |
| |
| if (!arg1Type || !arg2Type) |
| return; |
| |
| // Support only floating point and integer arguments |
| // now (e.g. logical is skipped here). |
| if (!mlir::isa<mlir::FloatType, mlir::IntegerType>(*arg1Type)) |
| return; |
| if (!mlir::isa<mlir::FloatType, mlir::IntegerType>(*arg2Type)) |
| return; |
| |
| auto typeGenerator = [&type](fir::FirOpBuilder &builder) { |
| return genRuntimeDotType(builder, type); |
| }; |
| auto bodyGenerator = [&arg1Type, |
| &arg2Type](fir::FirOpBuilder &builder, |
| mlir::func::FuncOp &funcOp) { |
| genRuntimeDotBody(builder, funcOp, *arg1Type, *arg2Type); |
| }; |
| |
| // Suffix the function name with the element types |
| // of the arguments. |
| std::string typedFuncName(funcName); |
| llvm::raw_string_ostream nameOS(typedFuncName); |
| // We must mangle the generated function name with FastMathFlags |
| // value. |
| if (!fmfString.empty()) |
| nameOS << '_' << fmfString; |
| nameOS << '_'; |
| arg1Type->print(nameOS); |
| nameOS << '_'; |
| arg2Type->print(nameOS); |
| |
| mlir::func::FuncOp newFunc = getOrCreateFunction( |
| builder, typedFuncName, typeGenerator, bodyGenerator); |
| auto newCall = builder.create<fir::CallOp>(loc, newFunc, |
| mlir::ValueRange{v1, v2}); |
| call->replaceAllUsesWith(newCall.getResults()); |
| call->dropAllReferences(); |
| call->erase(); |
| |
| LLVM_DEBUG(llvm::dbgs() << "Replaced with:\n"; newCall.dump(); |
| llvm::dbgs() << "\n"); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(Maxval))) { |
| simplifyIntOrFloatReduction(call, kindMap, genRuntimeMaxvalBody); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(Count))) { |
| simplifyLogicalDim0Reduction(call, kindMap, genRuntimeCountBody); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(Any))) { |
| simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAnyBody); |
| return; |
| } |
| if (funcName.ends_with(RTNAME_STRING(All))) { |
| simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAllBody); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(Minloc))) { |
| simplifyMinMaxlocReduction(call, kindMap, false); |
| return; |
| } |
| if (funcName.starts_with(RTNAME_STRING(Maxloc))) { |
| simplifyMinMaxlocReduction(call, kindMap, true); |
| return; |
| } |
| } |
| } |
| }); |
| LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n"); |
| } |
| |
| void SimplifyIntrinsicsPass::getDependentDialects( |
| mlir::DialectRegistry ®istry) const { |
| // LLVM::LinkageAttr creation requires that LLVM dialect is loaded. |
| registry.insert<mlir::LLVM::LLVMDialect>(); |
| } |