| //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a translation between the MLIR OpenMP dialect and LLVM |
| // IR. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" |
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| #include "mlir/IR/BlockAndValueMapping.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Support/LLVM.h" |
| #include "mlir/Target/LLVMIR/ModuleTranslation.h" |
| |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
| #include "llvm/IR/IRBuilder.h" |
| |
| using namespace mlir; |
| |
| namespace { |
| /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the |
| /// insertion points for allocas. |
| class OpenMPAllocaStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { |
| public: |
| explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) |
| : allocaInsertPoint(allocaIP) {} |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| }; |
| |
| /// ModuleTranslation stack frame containing the partial mapping between MLIR |
| /// values and their LLVM IR equivalents. |
| class OpenMPVarMappingStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase< |
| OpenMPVarMappingStackFrame> { |
| public: |
| explicit OpenMPVarMappingStackFrame( |
| const DenseMap<Value, llvm::Value *> &mapping) |
| : mapping(mapping) {} |
| |
| DenseMap<Value, llvm::Value *> mapping; |
| }; |
| } // namespace |
| |
| /// Find the insertion point for allocas given the current insertion point for |
| /// normal operations in the builder. |
| static llvm::OpenMPIRBuilder::InsertPointTy |
| findAllocaInsertPoint(llvm::IRBuilderBase &builder, |
| const LLVM::ModuleTranslation &moduleTranslation) { |
| // If there is an alloca insertion point on stack, i.e. we are in a nested |
| // operation and a specific point was provided by some surrounding operation, |
| // use it. |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( |
| [&](const OpenMPAllocaStackFrame &frame) { |
| allocaInsertPoint = frame.allocaInsertPoint; |
| return WalkResult::interrupt(); |
| }); |
| if (walkResult.wasInterrupted()) |
| return allocaInsertPoint; |
| |
| // Otherwise, insert to the entry block of the surrounding function. |
| llvm::BasicBlock &funcEntryBlock = |
| builder.GetInsertBlock()->getParent()->getEntryBlock(); |
| return llvm::OpenMPIRBuilder::InsertPointTy( |
| &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); |
| } |
| |
| /// Converts the given region that appears within an OpenMP dialect operation to |
| /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the |
| /// region, and a branch from any block with an successor-less OpenMP terminator |
| /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes |
| /// of the continuation block if provided. |
| static void convertOmpOpRegions( |
| Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, |
| llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, |
| SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { |
| llvm::LLVMContext &llvmContext = builder.getContext(); |
| for (Block &bb : region) { |
| llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( |
| llvmContext, blockName, builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| moduleTranslation.mapBlock(&bb, llvmBB); |
| } |
| |
| llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); |
| |
| // Terminators (namely YieldOp) may be forwarding values to the region that |
| // need to be available in the continuation block. Collect the types of these |
| // operands in preparation of creating PHI nodes. |
| SmallVector<llvm::Type *> continuationBlockPHITypes; |
| bool operandsProcessed = false; |
| unsigned numYields = 0; |
| for (Block &bb : region.getBlocks()) { |
| if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { |
| if (!operandsProcessed) { |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| continuationBlockPHITypes.push_back( |
| moduleTranslation.convertType(yield->getOperand(i).getType())); |
| } |
| operandsProcessed = true; |
| } else { |
| assert(continuationBlockPHITypes.size() == yield->getNumOperands() && |
| "mismatching number of values yielded from the region"); |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| llvm::Type *operandType = |
| moduleTranslation.convertType(yield->getOperand(i).getType()); |
| (void)operandType; |
| assert(continuationBlockPHITypes[i] == operandType && |
| "values of mismatching types yielded from the region"); |
| } |
| } |
| numYields++; |
| } |
| } |
| |
| // Insert PHI nodes in the continuation block for any values forwarded by the |
| // terminators in this region. |
| if (!continuationBlockPHITypes.empty()) |
| assert( |
| continuationBlockPHIs && |
| "expected continuation block PHIs if converted regions yield values"); |
| if (continuationBlockPHIs) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); |
| builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); |
| for (llvm::Type *ty : continuationBlockPHITypes) |
| continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); |
| } |
| |
| // Convert blocks one by one in topological order to ensure |
| // defs are converted before uses. |
| SetVector<Block *> blocks = |
| LLVM::detail::getTopologicallySortedBlocks(region); |
| for (Block *bb : blocks) { |
| llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); |
| // Retarget the branch of the entry block to the entry block of the |
| // converted region (regions are single-entry). |
| if (bb->isEntryBlock()) { |
| assert(sourceTerminator->getNumSuccessors() == 1 && |
| "provided entry block has multiple successors"); |
| assert(sourceTerminator->getSuccessor(0) == &continuationBlock && |
| "ContinuationBlock is not the successor of the entry block"); |
| sourceTerminator->setSuccessor(0, llvmBB); |
| } |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| if (failed( |
| moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { |
| bodyGenStatus = failure(); |
| return; |
| } |
| |
| // Special handling for `omp.yield` and `omp.terminator` (we may have more |
| // than one): they return the control to the parent OpenMP dialect operation |
| // so replace them with the branch to the continuation block. We handle this |
| // here to avoid relying inter-function communication through the |
| // ModuleTranslation class to set up the correct insertion point. This is |
| // also consistent with MLIR's idiom of handling special region terminators |
| // in the same code that handles the region-owning operation. |
| Operation *terminator = bb->getTerminator(); |
| if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { |
| builder.CreateBr(&continuationBlock); |
| |
| for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) |
| (*continuationBlockPHIs)[i]->addIncoming( |
| moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); |
| } |
| } |
| // After all blocks have been traversed and values mapped, connect the PHI |
| // nodes to the results of preceding blocks. |
| LLVM::detail::connectPHINodes(region, moduleTranslation); |
| |
| // Remove the blocks and values defined in this region from the mapping since |
| // they are not visible outside of this region. This allows the same region to |
| // be converted several times, that is cloned, without clashes, and slightly |
| // speeds up the lookups. |
| moduleTranslation.forgetMapping(region); |
| } |
| |
| /// Converts the OpenMP parallel operation to LLVM IR. |
| static LogicalResult |
| convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::BasicBlock &continuationBlock) { |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // ParallelOp has only one region associated with it. |
| auto ®ion = cast<omp::ParallelOp>(opInst).getRegion(); |
| convertOmpOpRegions(region, "omp.par.region", *codeGenIP.getBlock(), |
| continuationBlock, builder, moduleTranslation, |
| bodyGenStatus); |
| }; |
| |
| // TODO: Perform appropriate actions according to the data-sharing |
| // attribute (shared, private, firstprivate, ...) of variables. |
| // Currently defaults to shared. |
| auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::Value &, llvm::Value &vPtr, |
| llvm::Value *&replacementValue) -> InsertPointTy { |
| replacementValue = &vPtr; |
| |
| return codeGenIP; |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::Value *ifCond = nullptr; |
| if (auto ifExprVar = cast<omp::ParallelOp>(opInst).if_expr_var()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| llvm::Value *numThreads = nullptr; |
| if (auto numThreadsVar = cast<omp::ParallelOp>(opInst).num_threads_var()) |
| numThreads = moduleTranslation.lookupValue(numThreadsVar); |
| llvm::omp::ProcBindKind pbKind = llvm::omp::OMP_PROC_BIND_default; |
| if (auto bind = cast<omp::ParallelOp>(opInst).proc_bind_val()) |
| pbKind = llvm::omp::getProcBindKind(bind.getValue()); |
| // TODO: Is the Parallel construct cancellable? |
| bool isCancellable = false; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc( |
| builder.saveIP(), builder.getCurrentDebugLocation()); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( |
| ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB, |
| privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); |
| |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::BasicBlock &continuationBlock) { |
| // MasterOp has only one region associated with it. |
| auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); |
| convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), |
| continuationBlock, builder, moduleTranslation, |
| bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc( |
| builder.saveIP(), builder.getCurrentDebugLocation()); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( |
| ompLoc, bodyGenCB, finiCB)); |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto criticalOp = cast<omp::CriticalOp>(opInst); |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::BasicBlock &continuationBlock) { |
| // CriticalOp has only one region associated with it. |
| auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); |
| convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), |
| continuationBlock, builder, moduleTranslation, |
| bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc( |
| builder.saveIP(), builder.getCurrentDebugLocation()); |
| llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); |
| llvm::Constant *hint = nullptr; |
| |
| // If it has a name, it probably has a hint too. |
| if (criticalOp.nameAttr()) { |
| // The verifiers in OpenMP Dialect guarentee that all the pointers are |
| // non-null |
| auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); |
| auto criticalDeclareOp = |
| SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, |
| symbolRef); |
| hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), |
| static_cast<int>(criticalDeclareOp.hint())); |
| } |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( |
| ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); |
| return success(); |
| } |
| |
| /// Returns a reduction declaration that corresponds to the given reduction |
| /// operation in the given container. Currently only supports reductions inside |
| /// WsLoopOp but can be easily extended. |
| static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, |
| omp::ReductionOp reduction) { |
| SymbolRefAttr reductionSymbol; |
| for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { |
| if (container.reduction_vars()[i] != reduction.accumulator()) |
| continue; |
| reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); |
| break; |
| } |
| assert(reductionSymbol && |
| "reduction operation must be associated with a declaration"); |
| |
| return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( |
| container, reductionSymbol); |
| } |
| |
| /// Populates `reductions` with reduction declarations used in the given loop. |
| static void |
| collectReductionDecls(omp::WsLoopOp loop, |
| SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { |
| Optional<ArrayAttr> attr = loop.reductions(); |
| if (!attr) |
| return; |
| |
| reductions.reserve(reductions.size() + loop.getNumReductionVars()); |
| for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { |
| reductions.push_back( |
| SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( |
| loop, symbolRef)); |
| } |
| } |
| |
| /// Translates the blocks contained in the given region and appends them to at |
| /// the current insertion point of `builder`. The operations of the entry block |
| /// are appended to the current insertion block, which is not expected to have a |
| /// terminator. If set, `continuationBlockArgs` is populated with translated |
| /// values that correspond to the values omp.yield'ed from the region. |
| static LogicalResult inlineConvertOmpRegions( |
| Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { |
| if (region.empty()) |
| return success(); |
| |
| // Special case for single-block regions that don't create additional blocks: |
| // insert operations without creating additional blocks. |
| if (llvm::hasSingleElement(region)) { |
| moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock( |
| region.front(), /*ignoreArguments=*/true, builder))) |
| return failure(); |
| |
| // The continuation arguments are simply the translated terminator operands. |
| if (continuationBlockArgs) |
| llvm::append_range( |
| *continuationBlockArgs, |
| moduleTranslation.lookupValues(region.front().back().getOperands())); |
| |
| // Drop the mapping that is no longer necessary so that the same region can |
| // be processed multiple times. |
| moduleTranslation.forgetMapping(region); |
| return success(); |
| } |
| |
| // Create the continuation block manually instead of calling splitBlock |
| // because the current insertion block may not have a terminator. |
| llvm::BasicBlock *continuationBlock = |
| llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", |
| builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| builder.CreateBr(continuationBlock); |
| |
| LogicalResult bodyGenStatus = success(); |
| SmallVector<llvm::PHINode *> phis; |
| convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), |
| *continuationBlock, builder, moduleTranslation, |
| bodyGenStatus, &phis); |
| if (failed(bodyGenStatus)) |
| return failure(); |
| if (continuationBlockArgs) |
| llvm::append_range(*continuationBlockArgs, phis); |
| builder.SetInsertPoint(continuationBlock, |
| continuationBlock->getFirstInsertionPt()); |
| return success(); |
| } |
| |
| namespace { |
| /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to |
| /// store lambdas with capture. |
| using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, |
| llvm::Value *&)>; |
| using OwningAtomicReductionGen = |
| std::function<llvm::OpenMPIRBuilder::InsertPointTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *)>; |
| } // namespace |
| |
| /// Create an OpenMPIRBuilder-compatible reduction generator for the given |
| /// reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. |
| static OwningReductionGen |
| makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningReductionGen gen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, |
| llvm::Value *lhs, llvm::Value *rhs, |
| llvm::Value *&result) mutable { |
| Region &reductionRegion = decl.reductionRegion(); |
| moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); |
| moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(reductionRegion, |
| "omp.reduction.nonatomic.body", |
| builder, moduleTranslation, &phis))) |
| return llvm::OpenMPIRBuilder::InsertPointTy(); |
| assert(phis.size() == 1); |
| result = phis[0]; |
| return builder.saveIP(); |
| }; |
| return gen; |
| } |
| |
| /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the |
| /// given reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. Returns null if there is no atomic region available in the |
| /// reduction declaration. |
| static OwningAtomicReductionGen |
| makeAtomicReductionGen(omp::ReductionDeclareOp decl, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (decl.atomicReductionRegion().empty()) |
| return OwningAtomicReductionGen(); |
| |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningAtomicReductionGen atomicGen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, |
| llvm::Value *lhs, llvm::Value *rhs) mutable { |
| Region &atomicRegion = decl.atomicReductionRegion(); |
| moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); |
| moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(atomicRegion, |
| "omp.reduction.atomic.body", builder, |
| moduleTranslation, &phis))) |
| return llvm::OpenMPIRBuilder::InsertPointTy(); |
| assert(phis.empty()); |
| return builder.saveIP(); |
| }; |
| return atomicGen; |
| } |
| |
| /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto orderedOp = cast<omp::OrderedOp>(opInst); |
| |
| omp::ClauseDepend dependType = |
| *omp::symbolizeClauseDepend(orderedOp.depend_type_valAttr().getValue()); |
| bool isDependSource = dependType == omp::ClauseDepend::dependsource; |
| unsigned numLoops = orderedOp.num_loops_val().getValue(); |
| SmallVector<llvm::Value *> vecValues = |
| moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc( |
| builder.saveIP(), builder.getCurrentDebugLocation()); |
| size_t indexVecValues = 0; |
| while (indexVecValues < vecValues.size()) { |
| SmallVector<llvm::Value *> storeValues; |
| storeValues.reserve(numLoops); |
| for (unsigned i = 0; i < numLoops; i++) { |
| storeValues.push_back(vecValues[indexVecValues]); |
| indexVecValues++; |
| } |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( |
| ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops, |
| storeValues, ".cnt.addr", isDependSource)); |
| } |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'ordered_region' operation into LLVM IR using |
| /// OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); |
| |
| // TODO: The code generation for ordered simd directive is not supported yet. |
| if (orderedRegionOp.simd()) |
| return failure(); |
| |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::BasicBlock &continuationBlock) { |
| // OrderedOp has only one region associated with it. |
| auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); |
| convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), |
| continuationBlock, builder, moduleTranslation, |
| bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc( |
| builder.saveIP(), builder.getCurrentDebugLocation()); |
| builder.restoreIP( |
| moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( |
| ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto loop = cast<omp::WsLoopOp>(opInst); |
| // TODO: this should be in the op verifier instead. |
| if (loop.lowerBound().empty()) |
| return failure(); |
| |
| // Static is the default. |
| omp::ClauseScheduleKind schedule = omp::ClauseScheduleKind::Static; |
| if (loop.schedule_val().hasValue()) |
| schedule = |
| *omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()); |
| |
| // Find the loop configuration. |
| llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); |
| llvm::Type *ivType = step->getType(); |
| llvm::Value *chunk = |
| loop.schedule_chunk_var() |
| ? moduleTranslation.lookupValue(loop.schedule_chunk_var()) |
| : llvm::ConstantInt::get(ivType, 1); |
| |
| SmallVector<omp::ReductionDeclareOp> reductionDecls; |
| collectReductionDecls(loop, reductionDecls); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| // Allocate space for privatized reduction variables. |
| SmallVector<llvm::Value *> privateReductionVariables; |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| unsigned numReductions = loop.getNumReductionVars(); |
| privateReductionVariables.reserve(numReductions); |
| if (numReductions != 0) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.restoreIP(allocaIP); |
| for (unsigned i = 0; i < numReductions; ++i) { |
| auto reductionType = |
| loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionType.getElementType())); |
| privateReductionVariables.push_back(var); |
| reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); |
| } |
| } |
| |
| // Store the mapping between reduction variables and their private copies on |
| // ModuleTranslation stack. It can be then recovered when translating |
| // omp.reduce operations in a separate call. |
| LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( |
| moduleTranslation, reductionVariableMap); |
| |
| // Before the loop, store the initial values of reductions into reduction |
| // variables. Although this could be done after allocas, we don't want to mess |
| // up with the alloca insertion point. |
| for (unsigned i = 0; i < numReductions; ++i) { |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), |
| "omp.reduction.neutral", builder, |
| moduleTranslation, &phis))) |
| return failure(); |
| assert(phis.size() == 1 && "expected one value to be yielded from the " |
| "reduction neutral element declaration region"); |
| builder.CreateStore(phis[0], privateReductionVariables[i]); |
| } |
| |
| // Set up the source location value for OpenMP runtime. |
| llvm::DISubprogram *subprogram = |
| builder.GetInsertBlock()->getParent()->getSubprogram(); |
| const llvm::DILocation *diLoc = |
| moduleTranslation.translateLoc(opInst.getLoc(), subprogram); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), |
| llvm::DebugLoc(diLoc)); |
| |
| // Generator of the canonical loop body. |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| SmallVector<llvm::CanonicalLoopInfo *> loopInfos; |
| SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; |
| LogicalResult bodyGenStatus = success(); |
| auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { |
| // Make sure further conversions know about the induction variable. |
| moduleTranslation.mapValue( |
| loop.getRegion().front().getArgument(loopInfos.size()), iv); |
| |
| // Capture the body insertion point for use in nested loops. BodyIP of the |
| // CanonicalLoopInfo always points to the beginning of the entry block of |
| // the body. |
| bodyInsertPoints.push_back(ip); |
| |
| if (loopInfos.size() != loop.getNumLoops() - 1) |
| return; |
| |
| // Convert the body of the loop. |
| llvm::BasicBlock *entryBlock = ip.getBlock(); |
| llvm::BasicBlock *exitBlock = |
| entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); |
| convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, |
| *exitBlock, builder, moduleTranslation, bodyGenStatus); |
| }; |
| |
| // Delegate actual loop construction to the OpenMP IRBuilder. |
| // TODO: this currently assumes WsLoop is semantically similar to SCF loop, |
| // i.e. it has a positive step, uses signed integer semantics. Reconsider |
| // this code when WsLoop clearly supports more cases. |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { |
| llvm::Value *lowerBound = |
| moduleTranslation.lookupValue(loop.lowerBound()[i]); |
| llvm::Value *upperBound = |
| moduleTranslation.lookupValue(loop.upperBound()[i]); |
| llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); |
| |
| // Make sure loop trip count are emitted in the preheader of the outermost |
| // loop at the latest so that they are all available for the new collapsed |
| // loop will be created below. |
| llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; |
| llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; |
| if (i != 0) { |
| loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), |
| llvm::DebugLoc(diLoc)); |
| computeIP = loopInfos.front()->getPreheaderIP(); |
| } |
| loopInfos.push_back(ompBuilder->createCanonicalLoop( |
| loc, bodyGen, lowerBound, upperBound, step, |
| /*IsSigned=*/true, loop.inclusive(), computeIP)); |
| |
| if (failed(bodyGenStatus)) |
| return failure(); |
| } |
| |
| // Collapse loops. Store the insertion point because LoopInfos may get |
| // invalidated. |
| llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); |
| llvm::CanonicalLoopInfo *loopInfo = |
| ompBuilder->collapseLoops(diLoc, loopInfos, {}); |
| |
| allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| |
| bool isSimd = loop.simd_modifier(); |
| |
| if (schedule == omp::ClauseScheduleKind::Static) { |
| ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, |
| !loop.nowait(), chunk); |
| } else { |
| llvm::omp::OMPScheduleType schedType; |
| switch (schedule) { |
| case omp::ClauseScheduleKind::Dynamic: |
| schedType = llvm::omp::OMPScheduleType::DynamicChunked; |
| break; |
| case omp::ClauseScheduleKind::Guided: |
| if (isSimd) |
| schedType = llvm::omp::OMPScheduleType::GuidedSimd; |
| else |
| schedType = llvm::omp::OMPScheduleType::GuidedChunked; |
| break; |
| case omp::ClauseScheduleKind::Auto: |
| schedType = llvm::omp::OMPScheduleType::Auto; |
| break; |
| case omp::ClauseScheduleKind::Runtime: |
| if (isSimd) |
| schedType = llvm::omp::OMPScheduleType::RuntimeSimd; |
| else |
| schedType = llvm::omp::OMPScheduleType::Runtime; |
| break; |
| default: |
| llvm_unreachable("Unknown schedule value"); |
| break; |
| } |
| |
| if (loop.schedule_modifier().hasValue()) { |
| omp::ScheduleModifier modifier = |
| *omp::symbolizeScheduleModifier(loop.schedule_modifier().getValue()); |
| switch (modifier) { |
| case omp::ScheduleModifier::monotonic: |
| schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; |
| break; |
| case omp::ScheduleModifier::nonmonotonic: |
| schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; |
| break; |
| default: |
| // Nothing to do here. |
| break; |
| } |
| } |
| afterIP = ompBuilder->applyDynamicWorkshareLoop( |
| ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); |
| } |
| |
| // Continue building IR after the loop. Note that the LoopInfo returned by |
| // `collapseLoops` points inside the outermost loop and is intended for |
| // potential further loop transformations. Use the insertion point stored |
| // before collapsing loops instead. |
| builder.restoreIP(afterIP); |
| |
| // Process the reductions if required. |
| if (numReductions == 0) |
| return success(); |
| |
| // Create the reduction generators. We need to own them here because |
| // ReductionInfo only accepts references to the generators. |
| SmallVector<OwningReductionGen> owningReductionGens; |
| SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; |
| for (unsigned i = 0; i < numReductions; ++i) { |
| owningReductionGens.push_back( |
| makeReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| owningAtomicReductionGens.push_back( |
| makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| } |
| |
| // Collect the reduction information. |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; |
| reductionInfos.reserve(numReductions); |
| for (unsigned i = 0; i < numReductions; ++i) { |
| llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; |
| if (owningAtomicReductionGens[i]) |
| atomicGen = owningAtomicReductionGens[i]; |
| reductionInfos.push_back( |
| {moduleTranslation.lookupValue(loop.reduction_vars()[i]), |
| privateReductionVariables[i], owningReductionGens[i], atomicGen}); |
| } |
| |
| // The call to createReductions below expects the block to have a |
| // terminator. Create an unreachable instruction to serve as terminator |
| // and remove it later. |
| llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); |
| builder.SetInsertPoint(tempTerminator); |
| llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = |
| ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, |
| loop.nowait()); |
| if (!contInsertPoint.getBlock()) |
| return loop->emitOpError() << "failed to convert reductions"; |
| auto nextInsertionPoint = |
| ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); |
| tempTerminator->eraseFromParent(); |
| builder.restoreIP(nextInsertionPoint); |
| |
| return success(); |
| } |
| |
| /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the |
| /// mapping between reduction variables and their private equivalents to have |
| /// been stored on the ModuleTranslation stack. Currently only supports |
| /// reduction within WsLoopOp, but can be easily extended. |
| static LogicalResult |
| convertOmpReductionOp(omp::ReductionOp reductionOp, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // Find the declaration that corresponds to the reduction op. |
| auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); |
| omp::ReductionDeclareOp declaration = |
| findReductionDecl(reductionContainer, reductionOp); |
| assert(declaration && "could not find reduction declaration"); |
| |
| // Retrieve the mapping between reduction variables and their private |
| // equivalents. |
| const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; |
| moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( |
| [&](const OpenMPVarMappingStackFrame &frame) { |
| reductionVariableMap = &frame.mapping; |
| return WalkResult::interrupt(); |
| }); |
| assert(reductionVariableMap && "couldn't find private reduction variables"); |
| |
| // Translate the reduction operation by emitting the body of the corresponding |
| // reduction declaration. |
| Region &reductionRegion = declaration.reductionRegion(); |
| llvm::Value *privateReductionVar = |
| reductionVariableMap->lookup(reductionOp.accumulator()); |
| llvm::Value *reductionVal = builder.CreateLoad( |
| moduleTranslation.convertType(reductionOp.operand().getType()), |
| privateReductionVar); |
| |
| moduleTranslation.mapValue(reductionRegion.front().getArgument(0), |
| reductionVal); |
| moduleTranslation.mapValue( |
| reductionRegion.front().getArgument(1), |
| moduleTranslation.lookupValue(reductionOp.operand())); |
| |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", |
| builder, moduleTranslation, &phis))) |
| return failure(); |
| assert(phis.size() == 1 && "expected one value to be yielded from " |
| "the reduction body declaration region"); |
| builder.CreateStore(phis[0], privateReductionVar); |
| return success(); |
| } |
| |
| namespace { |
| |
| /// Implementation of the dialect interface that converts operations belonging |
| /// to the OpenMP dialect to LLVM IR. |
| class OpenMPDialectLLVMIRTranslationInterface |
| : public LLVMTranslationDialectInterface { |
| public: |
| using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; |
| |
| /// Translates the given operation to LLVM IR using the provided IR builder |
| /// and saving the state in `moduleTranslation`. |
| LogicalResult |
| convertOperation(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const final; |
| }; |
| |
| } // end namespace |
| |
| /// Given an OpenMP MLIR operation, create the corresponding LLVM IR |
| /// (including OpenMP runtime calls). |
| LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( |
| Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const { |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| return llvm::TypeSwitch<Operation *, LogicalResult>(op) |
| .Case([&](omp::BarrierOp) { |
| ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); |
| return success(); |
| }) |
| .Case([&](omp::TaskwaitOp) { |
| ompBuilder->createTaskwait(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::TaskyieldOp) { |
| ompBuilder->createTaskyield(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::FlushOp) { |
| // No support in Openmp runtime function (__kmpc_flush) to accept |
| // the argument list. |
| // OpenMP standard states the following: |
| // "An implementation may implement a flush with a list by ignoring |
| // the list, and treating it the same as a flush without a list." |
| // |
| // The argument list is discarded so that, flush with a list is treated |
| // same as a flush without a list. |
| ompBuilder->createFlush(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::ParallelOp) { |
| return convertOmpParallel(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::ReductionOp reductionOp) { |
| return convertOmpReductionOp(reductionOp, builder, moduleTranslation); |
| }) |
| .Case([&](omp::MasterOp) { |
| return convertOmpMaster(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::CriticalOp) { |
| return convertOmpCritical(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedRegionOp) { |
| return convertOmpOrderedRegion(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedOp) { |
| return convertOmpOrdered(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::WsLoopOp) { |
| return convertOmpWsLoop(*op, builder, moduleTranslation); |
| }) |
| .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, |
| omp::CriticalDeclareOp>([](auto op) { |
| // `yield` and `terminator` can be just omitted. The block structure |
| // was created in the region that handles their parent operation. |
| // `reduction.declare` will be used by reductions and is not |
| // converted directly, skip it. |
| // `critical.declare` is only used to declare names of critical |
| // sections which will be used by `critical` ops and hence can be |
| // ignored for lowering. The OpenMP IRBuilder will create unique |
| // name for critical section names. |
| return success(); |
| }) |
| .Default([&](Operation *inst) { |
| return inst->emitError("unsupported OpenMP operation: ") |
| << inst->getName(); |
| }); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { |
| registry.insert<omp::OpenMPDialect>(); |
| registry.addDialectInterface<omp::OpenMPDialect, |
| OpenMPDialectLLVMIRTranslationInterface>(); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { |
| DialectRegistry registry; |
| registerOpenMPDialectTranslation(registry); |
| context.appendDialectRegistry(registry); |
| } |