| //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a translation between the MLIR OpenMP dialect and LLVM |
| // IR. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" |
| #include "mlir/Analysis/TopologicalSortUtils.h" |
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| #include "mlir/Dialect/LLVMIR/LLVMTypes.h" |
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" |
| #include "mlir/IR/IRMapping.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Support/LLVM.h" |
| #include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" |
| #include "mlir/Target/LLVMIR/ModuleTranslation.h" |
| #include "mlir/Transforms/RegionUtils.h" |
| |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Frontend/OpenMP/OMPConstants.h" |
| #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DebugInfoMetadata.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/MDBuilder.h" |
| #include "llvm/IR/ReplaceConstant.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/TargetParser/Triple.h" |
| #include "llvm/Transforms/Utils/ModuleUtils.h" |
| |
| #include <any> |
| #include <cstdint> |
| #include <iterator> |
| #include <numeric> |
| #include <optional> |
| #include <utility> |
| |
| using namespace mlir; |
| |
| namespace { |
| static llvm::omp::ScheduleKind |
| convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) { |
| if (!schedKind.has_value()) |
| return llvm::omp::OMP_SCHEDULE_Default; |
| switch (schedKind.value()) { |
| case omp::ClauseScheduleKind::Static: |
| return llvm::omp::OMP_SCHEDULE_Static; |
| case omp::ClauseScheduleKind::Dynamic: |
| return llvm::omp::OMP_SCHEDULE_Dynamic; |
| case omp::ClauseScheduleKind::Guided: |
| return llvm::omp::OMP_SCHEDULE_Guided; |
| case omp::ClauseScheduleKind::Auto: |
| return llvm::omp::OMP_SCHEDULE_Auto; |
| case omp::ClauseScheduleKind::Runtime: |
| return llvm::omp::OMP_SCHEDULE_Runtime; |
| } |
| llvm_unreachable("unhandled schedule clause argument"); |
| } |
| |
| /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the |
| /// insertion points for allocas. |
| class OpenMPAllocaStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) |
| |
| explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) |
| : allocaInsertPoint(allocaIP) {} |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| }; |
| |
| /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the |
| /// collapsed canonical loop information corresponding to an \c omp.loop_nest |
| /// operation. |
| class OpenMPLoopInfoStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> { |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame) |
| llvm::CanonicalLoopInfo *loopInfo = nullptr; |
| }; |
| |
| /// Custom error class to signal translation errors that don't need reporting, |
| /// since encountering them will have already triggered relevant error messages. |
| /// |
| /// Its purpose is to serve as the glue between MLIR failures represented as |
| /// \see LogicalResult instances and \see llvm::Error instances used to |
| /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an |
| /// error of the first type is raised, a message is emitted directly (the \see |
| /// LogicalResult itself does not hold any information). If we need to forward |
| /// this error condition as an \see llvm::Error while avoiding triggering some |
| /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo |
| /// class to just signal this situation has happened. |
| /// |
| /// For example, this class should be used to trigger errors from within |
| /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the |
| /// translation of their own regions. This unclutters the error log from |
| /// redundant messages. |
| class PreviouslyReportedError |
| : public llvm::ErrorInfo<PreviouslyReportedError> { |
| public: |
| void log(raw_ostream &) const override { |
| // Do not log anything. |
| } |
| |
| std::error_code convertToErrorCode() const override { |
| llvm_unreachable( |
| "PreviouslyReportedError doesn't support ECError conversion"); |
| } |
| |
| // Used by ErrorInfo::classID. |
| static char ID; |
| }; |
| |
| char PreviouslyReportedError::ID = 0; |
| |
| } // namespace |
| |
| /// Looks up from the operation from and returns the PrivateClauseOp with |
| /// name symbolName |
| static omp::PrivateClauseOp findPrivatizer(Operation *from, |
| SymbolRefAttr symbolName) { |
| omp::PrivateClauseOp privatizer = |
| SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from, |
| symbolName); |
| assert(privatizer && "privatizer not found in the symbol table"); |
| return privatizer; |
| } |
| |
| /// Check whether translation to LLVM IR for the given operation is currently |
| /// supported. If not, descriptive diagnostics will be emitted to let users know |
| /// this is a not-yet-implemented feature. |
| /// |
| /// \returns success if no unimplemented features are needed to translate the |
| /// given operation. |
| static LogicalResult checkImplementationStatus(Operation &op) { |
| auto todo = [&op](StringRef clauseName) { |
| return op.emitError() << "not yet implemented: Unhandled clause " |
| << clauseName << " in " << op.getName() |
| << " operation"; |
| }; |
| |
| auto checkAllocate = [&todo](auto op, LogicalResult &result) { |
| if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty()) |
| result = todo("allocate"); |
| }; |
| auto checkBare = [&todo](auto op, LogicalResult &result) { |
| if (op.getBare()) |
| result = todo("ompx_bare"); |
| }; |
| auto checkCancelDirective = [&todo](auto op, LogicalResult &result) { |
| omp::ClauseCancellationConstructType cancelledDirective = |
| op.getCancelDirective(); |
| if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) |
| result = todo("cancel directive construct type not yet supported"); |
| }; |
| auto checkDepend = [&todo](auto op, LogicalResult &result) { |
| if (!op.getDependVars().empty() || op.getDependKinds()) |
| result = todo("depend"); |
| }; |
| auto checkDevice = [&todo](auto op, LogicalResult &result) { |
| if (op.getDevice()) |
| result = todo("device"); |
| }; |
| auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { |
| if (op.getDistScheduleChunkSize()) |
| result = todo("dist_schedule with chunk_size"); |
| }; |
| auto checkHint = [](auto op, LogicalResult &) { |
| if (op.getHint()) |
| op.emitWarning("hint clause discarded"); |
| }; |
| auto checkInReduction = [&todo](auto op, LogicalResult &result) { |
| if (!op.getInReductionVars().empty() || op.getInReductionByref() || |
| op.getInReductionSyms()) |
| result = todo("in_reduction"); |
| }; |
| auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) { |
| if (!op.getIsDevicePtrVars().empty()) |
| result = todo("is_device_ptr"); |
| }; |
| auto checkLinear = [&todo](auto op, LogicalResult &result) { |
| if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty()) |
| result = todo("linear"); |
| }; |
| auto checkNowait = [&todo](auto op, LogicalResult &result) { |
| if (op.getNowait()) |
| result = todo("nowait"); |
| }; |
| auto checkOrder = [&todo](auto op, LogicalResult &result) { |
| if (op.getOrder() || op.getOrderMod()) |
| result = todo("order"); |
| }; |
| auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) { |
| if (op.getParLevelSimd()) |
| result = todo("parallelization-level"); |
| }; |
| auto checkPriority = [&todo](auto op, LogicalResult &result) { |
| if (op.getPriority()) |
| result = todo("priority"); |
| }; |
| auto checkPrivate = [&todo](auto op, LogicalResult &result) { |
| if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) { |
| // Privatization is supported only for included target tasks. |
| if (!op.getPrivateVars().empty() && op.getNowait()) |
| result = todo("privatization for deferred target tasks"); |
| } else { |
| if (!op.getPrivateVars().empty() || op.getPrivateSyms()) |
| result = todo("privatization"); |
| } |
| }; |
| auto checkReduction = [&todo](auto op, LogicalResult &result) { |
| if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op)) |
| if (!op.getReductionVars().empty() || op.getReductionByref() || |
| op.getReductionSyms()) |
| result = todo("reduction"); |
| if (op.getReductionMod() && |
| op.getReductionMod().value() != omp::ReductionModifier::defaultmod) |
| result = todo("reduction with modifier"); |
| }; |
| auto checkTaskReduction = [&todo](auto op, LogicalResult &result) { |
| if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() || |
| op.getTaskReductionSyms()) |
| result = todo("task_reduction"); |
| }; |
| auto checkUntied = [&todo](auto op, LogicalResult &result) { |
| if (op.getUntied()) |
| result = todo("untied"); |
| }; |
| |
| LogicalResult result = success(); |
| llvm::TypeSwitch<Operation &>(op) |
| .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); }) |
| .Case([&](omp::DistributeOp op) { |
| checkAllocate(op, result); |
| checkDistSchedule(op, result); |
| checkOrder(op, result); |
| }) |
| .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); }) |
| .Case([&](omp::SectionsOp op) { |
| checkAllocate(op, result); |
| checkPrivate(op, result); |
| checkReduction(op, result); |
| }) |
| .Case([&](omp::SingleOp op) { |
| checkAllocate(op, result); |
| checkPrivate(op, result); |
| }) |
| .Case([&](omp::TeamsOp op) { |
| checkAllocate(op, result); |
| checkPrivate(op, result); |
| }) |
| .Case([&](omp::TaskOp op) { |
| checkAllocate(op, result); |
| checkInReduction(op, result); |
| }) |
| .Case([&](omp::TaskgroupOp op) { |
| checkAllocate(op, result); |
| checkTaskReduction(op, result); |
| }) |
| .Case([&](omp::TaskwaitOp op) { |
| checkDepend(op, result); |
| checkNowait(op, result); |
| }) |
| .Case([&](omp::TaskloopOp op) { |
| // TODO: Add other clauses check |
| checkUntied(op, result); |
| checkPriority(op, result); |
| }) |
| .Case([&](omp::WsloopOp op) { |
| checkAllocate(op, result); |
| checkLinear(op, result); |
| checkOrder(op, result); |
| checkReduction(op, result); |
| }) |
| .Case([&](omp::ParallelOp op) { |
| checkAllocate(op, result); |
| checkReduction(op, result); |
| }) |
| .Case([&](omp::SimdOp op) { |
| checkLinear(op, result); |
| checkReduction(op, result); |
| }) |
| .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp, |
| omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); }) |
| .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>( |
| [&](auto op) { checkDepend(op, result); }) |
| .Case([&](omp::TargetOp op) { |
| checkAllocate(op, result); |
| checkBare(op, result); |
| checkDevice(op, result); |
| checkInReduction(op, result); |
| checkIsDevicePtr(op, result); |
| checkPrivate(op, result); |
| }) |
| .Default([](Operation &) { |
| // Assume all clauses for an operation can be translated unless they are |
| // checked above. |
| }); |
| return result; |
| } |
| |
| static LogicalResult handleError(llvm::Error error, Operation &op) { |
| LogicalResult result = success(); |
| if (error) { |
| llvm::handleAllErrors( |
| std::move(error), |
| [&](const PreviouslyReportedError &) { result = failure(); }, |
| [&](const llvm::ErrorInfoBase &err) { |
| result = op.emitError(err.message()); |
| }); |
| } |
| return result; |
| } |
| |
| template <typename T> |
| static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) { |
| if (!result) |
| return handleError(result.takeError(), op); |
| |
| return success(); |
| } |
| |
| /// Find the insertion point for allocas given the current insertion point for |
| /// normal operations in the builder. |
| static llvm::OpenMPIRBuilder::InsertPointTy |
| findAllocaInsertPoint(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // If there is an alloca insertion point on stack, i.e. we are in a nested |
| // operation and a specific point was provided by some surrounding operation, |
| // use it. |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( |
| [&](OpenMPAllocaStackFrame &frame) { |
| allocaInsertPoint = frame.allocaInsertPoint; |
| return WalkResult::interrupt(); |
| }); |
| if (walkResult.wasInterrupted()) |
| return allocaInsertPoint; |
| |
| // Otherwise, insert to the entry block of the surrounding function. |
| // If the current IRBuilder InsertPoint is the function's entry, it cannot |
| // also be used for alloca insertion which would result in insertion order |
| // confusion. Create a new BasicBlock for the Builder and use the entry block |
| // for the allocs. |
| // TODO: Create a dedicated alloca BasicBlock at function creation such that |
| // we do not need to move the current InertPoint here. |
| if (builder.GetInsertBlock() == |
| &builder.GetInsertBlock()->getParent()->getEntryBlock()) { |
| assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && |
| "Assuming end of basic block"); |
| llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( |
| builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| builder.CreateBr(entryBB); |
| builder.SetInsertPoint(entryBB); |
| } |
| |
| llvm::BasicBlock &funcEntryBlock = |
| builder.GetInsertBlock()->getParent()->getEntryBlock(); |
| return llvm::OpenMPIRBuilder::InsertPointTy( |
| &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); |
| } |
| |
| /// Find the loop information structure for the loop nest being translated. It |
| /// will return a `null` value unless called from the translation function for |
| /// a loop wrapper operation after successfully translating its body. |
| static llvm::CanonicalLoopInfo * |
| findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::CanonicalLoopInfo *loopInfo = nullptr; |
| moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>( |
| [&](OpenMPLoopInfoStackFrame &frame) { |
| loopInfo = frame.loopInfo; |
| return WalkResult::interrupt(); |
| }); |
| return loopInfo; |
| } |
| |
| /// Converts the given region that appears within an OpenMP dialect operation to |
| /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the |
| /// region, and a branch from any block with an successor-less OpenMP terminator |
| /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes |
| /// of the continuation block if provided. |
| static llvm::Expected<llvm::BasicBlock *> convertOmpOpRegions( |
| Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { |
| bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp()); |
| |
| llvm::BasicBlock *continuationBlock = |
| splitBB(builder, true, "omp.region.cont"); |
| llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); |
| |
| llvm::LLVMContext &llvmContext = builder.getContext(); |
| for (Block &bb : region) { |
| llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( |
| llvmContext, blockName, builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| moduleTranslation.mapBlock(&bb, llvmBB); |
| } |
| |
| llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); |
| |
| // Terminators (namely YieldOp) may be forwarding values to the region that |
| // need to be available in the continuation block. Collect the types of these |
| // operands in preparation of creating PHI nodes. This is skipped for loop |
| // wrapper operations, for which we know in advance they have no terminators. |
| SmallVector<llvm::Type *> continuationBlockPHITypes; |
| unsigned numYields = 0; |
| |
| if (!isLoopWrapper) { |
| bool operandsProcessed = false; |
| for (Block &bb : region.getBlocks()) { |
| if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { |
| if (!operandsProcessed) { |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| continuationBlockPHITypes.push_back( |
| moduleTranslation.convertType(yield->getOperand(i).getType())); |
| } |
| operandsProcessed = true; |
| } else { |
| assert(continuationBlockPHITypes.size() == yield->getNumOperands() && |
| "mismatching number of values yielded from the region"); |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| llvm::Type *operandType = |
| moduleTranslation.convertType(yield->getOperand(i).getType()); |
| (void)operandType; |
| assert(continuationBlockPHITypes[i] == operandType && |
| "values of mismatching types yielded from the region"); |
| } |
| } |
| numYields++; |
| } |
| } |
| } |
| |
| // Insert PHI nodes in the continuation block for any values forwarded by the |
| // terminators in this region. |
| if (!continuationBlockPHITypes.empty()) |
| assert( |
| continuationBlockPHIs && |
| "expected continuation block PHIs if converted regions yield values"); |
| if (continuationBlockPHIs) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); |
| builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); |
| for (llvm::Type *ty : continuationBlockPHITypes) |
| continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); |
| } |
| |
| // Convert blocks one by one in topological order to ensure |
| // defs are converted before uses. |
| SetVector<Block *> blocks = getBlocksSortedByDominance(region); |
| for (Block *bb : blocks) { |
| llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); |
| // Retarget the branch of the entry block to the entry block of the |
| // converted region (regions are single-entry). |
| if (bb->isEntryBlock()) { |
| assert(sourceTerminator->getNumSuccessors() == 1 && |
| "provided entry block has multiple successors"); |
| assert(sourceTerminator->getSuccessor(0) == continuationBlock && |
| "ContinuationBlock is not the successor of the entry block"); |
| sourceTerminator->setSuccessor(0, llvmBB); |
| } |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| if (failed( |
| moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| // Create a direct branch here for loop wrappers to prevent their lack of a |
| // terminator from causing a crash below. |
| if (isLoopWrapper) { |
| builder.CreateBr(continuationBlock); |
| continue; |
| } |
| |
| // Special handling for `omp.yield` and `omp.terminator` (we may have more |
| // than one): they return the control to the parent OpenMP dialect operation |
| // so replace them with the branch to the continuation block. We handle this |
| // here to avoid relying inter-function communication through the |
| // ModuleTranslation class to set up the correct insertion point. This is |
| // also consistent with MLIR's idiom of handling special region terminators |
| // in the same code that handles the region-owning operation. |
| Operation *terminator = bb->getTerminator(); |
| if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { |
| builder.CreateBr(continuationBlock); |
| |
| for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) |
| (*continuationBlockPHIs)[i]->addIncoming( |
| moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); |
| } |
| } |
| // After all blocks have been traversed and values mapped, connect the PHI |
| // nodes to the results of preceding blocks. |
| LLVM::detail::connectPHINodes(region, moduleTranslation); |
| |
| // Remove the blocks and values defined in this region from the mapping since |
| // they are not visible outside of this region. This allows the same region to |
| // be converted several times, that is cloned, without clashes, and slightly |
| // speeds up the lookups. |
| moduleTranslation.forgetMapping(region); |
| |
| return continuationBlock; |
| } |
| |
| /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. |
| static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { |
| switch (kind) { |
| case omp::ClauseProcBindKind::Close: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; |
| case omp::ClauseProcBindKind::Master: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; |
| case omp::ClauseProcBindKind::Primary: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; |
| case omp::ClauseProcBindKind::Spread: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; |
| } |
| llvm_unreachable("Unknown ClauseProcBindKind kind"); |
| } |
| |
| /// Maps block arguments from \p blockArgIface (which are MLIR values) to the |
| /// corresponding LLVM values of \p the interface's operands. This is useful |
| /// when an OpenMP region with entry block arguments is converted to LLVM. In |
| /// this case the block arguments are (part of) of the OpenMP region's entry |
| /// arguments and the operands are (part of) of the operands to the OpenMP op |
| /// containing the region. |
| static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, |
| omp::BlockArgOpenMPOpInterface blockArgIface) { |
| llvm::SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs; |
| blockArgIface.getBlockArgsPairs(blockArgsPairs); |
| for (auto [var, arg] : blockArgsPairs) |
| moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var)); |
| } |
| |
| /// Helper function to map block arguments defined by ignored loop wrappers to |
| /// LLVM values and prevent any uses of those from triggering null pointer |
| /// dereferences. |
| /// |
| /// This must be called after block arguments of parent wrappers have already |
| /// been mapped to LLVM IR values. |
| static LogicalResult |
| convertIgnoredWrapper(omp::LoopWrapperInterface opInst, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // Map block arguments directly to the LLVM value associated to the |
| // corresponding operand. This is semantically equivalent to this wrapper not |
| // being present. |
| return llvm::TypeSwitch<Operation *, LogicalResult>(opInst) |
| .Case([&](omp::SimdOp op) { |
| forwardArgs(moduleTranslation, |
| cast<omp::BlockArgOpenMPOpInterface>(*op)); |
| op.emitWarning() << "simd information on composite construct discarded"; |
| return success(); |
| }) |
| .Default([&](Operation *op) { |
| return op->emitError() << "cannot ignore wrapper"; |
| }); |
| } |
| |
| /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto maskedOp = cast<omp::MaskedOp>(opInst); |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // MaskedOp has only one region associated with it. |
| auto ®ion = maskedOp.getRegion(); |
| builder.restoreIP(codeGenIP); |
| return convertOmpOpRegions(region, "omp.masked.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| llvm::Value *filterVal = nullptr; |
| if (auto filterVar = maskedOp.getFilteredThreadId()) { |
| filterVal = moduleTranslation.lookupValue(filterVar); |
| } else { |
| llvm::LLVMContext &llvmContext = builder.getContext(); |
| filterVal = |
| llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0); |
| } |
| assert(filterVal != nullptr); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB, |
| finiCB, filterVal); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto masterOp = cast<omp::MasterOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // MasterOp has only one region associated with it. |
| auto ®ion = masterOp.getRegion(); |
| builder.restoreIP(codeGenIP); |
| return convertOmpOpRegions(region, "omp.master.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB, |
| finiCB); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto criticalOp = cast<omp::CriticalOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // CriticalOp has only one region associated with it. |
| auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); |
| builder.restoreIP(codeGenIP); |
| return convertOmpOpRegions(region, "omp.critical.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); |
| llvm::Constant *hint = nullptr; |
| |
| // If it has a name, it probably has a hint too. |
| if (criticalOp.getNameAttr()) { |
| // The verifiers in OpenMP Dialect guarentee that all the pointers are |
| // non-null |
| auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr()); |
| auto criticalDeclareOp = |
| SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, |
| symbolRef); |
| hint = |
| llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), |
| static_cast<int>(criticalDeclareOp.getHint())); |
| } |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createCritical( |
| ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| /// A util to collect info needed to convert delayed privatizers from MLIR to |
| /// LLVM. |
| struct PrivateVarsInfo { |
| template <typename OP> |
| PrivateVarsInfo(OP op) |
| : blockArgs( |
| cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) { |
| mlirVars.reserve(blockArgs.size()); |
| llvmVars.reserve(blockArgs.size()); |
| collectPrivatizationDecls<OP>(op); |
| |
| for (mlir::Value privateVar : op.getPrivateVars()) |
| mlirVars.push_back(privateVar); |
| } |
| |
| MutableArrayRef<BlockArgument> blockArgs; |
| SmallVector<mlir::Value> mlirVars; |
| SmallVector<llvm::Value *> llvmVars; |
| SmallVector<omp::PrivateClauseOp> privatizers; |
| |
| private: |
| /// Populates `privatizations` with privatization declarations used for the |
| /// given op. |
| template <class OP> |
| void collectPrivatizationDecls(OP op) { |
| std::optional<ArrayAttr> attr = op.getPrivateSyms(); |
| if (!attr) |
| return; |
| |
| privatizers.reserve(privatizers.size() + attr->size()); |
| for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { |
| privatizers.push_back(findPrivatizer(op, symbolRef)); |
| } |
| } |
| }; |
| |
| /// Populates `reductions` with reduction declarations used in the given op. |
| template <typename T> |
| static void |
| collectReductionDecls(T op, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductions) { |
| std::optional<ArrayAttr> attr = op.getReductionSyms(); |
| if (!attr) |
| return; |
| |
| reductions.reserve(reductions.size() + op.getNumReductionVars()); |
| for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { |
| reductions.push_back( |
| SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>( |
| op, symbolRef)); |
| } |
| } |
| |
| /// Translates the blocks contained in the given region and appends them to at |
| /// the current insertion point of `builder`. The operations of the entry block |
| /// are appended to the current insertion block. If set, `continuationBlockArgs` |
| /// is populated with translated values that correspond to the values |
| /// omp.yield'ed from the region. |
| static LogicalResult inlineConvertOmpRegions( |
| Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { |
| if (region.empty()) |
| return success(); |
| |
| // Special case for single-block regions that don't create additional blocks: |
| // insert operations without creating additional blocks. |
| if (llvm::hasSingleElement(region)) { |
| llvm::Instruction *potentialTerminator = |
| builder.GetInsertBlock()->empty() ? nullptr |
| : &builder.GetInsertBlock()->back(); |
| |
| if (potentialTerminator && potentialTerminator->isTerminator()) |
| potentialTerminator->removeFromParent(); |
| moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); |
| |
| if (failed(moduleTranslation.convertBlock( |
| region.front(), /*ignoreArguments=*/true, builder))) |
| return failure(); |
| |
| // The continuation arguments are simply the translated terminator operands. |
| if (continuationBlockArgs) |
| llvm::append_range( |
| *continuationBlockArgs, |
| moduleTranslation.lookupValues(region.front().back().getOperands())); |
| |
| // Drop the mapping that is no longer necessary so that the same region can |
| // be processed multiple times. |
| moduleTranslation.forgetMapping(region); |
| |
| if (potentialTerminator && potentialTerminator->isTerminator()) { |
| llvm::BasicBlock *block = builder.GetInsertBlock(); |
| if (block->empty()) { |
| // this can happen for really simple reduction init regions e.g. |
| // %0 = llvm.mlir.constant(0 : i32) : i32 |
| // omp.yield(%0 : i32) |
| // because the llvm.mlir.constant (MLIR op) isn't converted into any |
| // llvm op |
| potentialTerminator->insertInto(block, block->begin()); |
| } else { |
| potentialTerminator->insertAfter(&block->back()); |
| } |
| } |
| |
| return success(); |
| } |
| |
| SmallVector<llvm::PHINode *> phis; |
| llvm::Expected<llvm::BasicBlock *> continuationBlock = |
| convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis); |
| |
| if (failed(handleError(continuationBlock, *region.getParentOp()))) |
| return failure(); |
| |
| if (continuationBlockArgs) |
| llvm::append_range(*continuationBlockArgs, phis); |
| builder.SetInsertPoint(*continuationBlock, |
| (*continuationBlock)->getFirstInsertionPt()); |
| return success(); |
| } |
| |
| namespace { |
| /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to |
| /// store lambdas with capture. |
| using OwningReductionGen = |
| std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, |
| llvm::Value *&)>; |
| using OwningAtomicReductionGen = |
| std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, |
| llvm::Value *)>; |
| } // namespace |
| |
| /// Create an OpenMPIRBuilder-compatible reduction generator for the given |
| /// reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. |
| static OwningReductionGen |
| makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningReductionGen gen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, |
| llvm::Value *lhs, llvm::Value *rhs, |
| llvm::Value *&result) mutable |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs); |
| moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(decl.getReductionRegion(), |
| "omp.reduction.nonatomic.body", builder, |
| moduleTranslation, &phis))) |
| return llvm::createStringError( |
| "failed to inline `combiner` region of `omp.declare_reduction`"); |
| result = llvm::getSingleElement(phis); |
| return builder.saveIP(); |
| }; |
| return gen; |
| } |
| |
| /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the |
| /// given reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. Returns null if there is no atomic region available in the |
| /// reduction declaration. |
| static OwningAtomicReductionGen |
| makeAtomicReductionGen(omp::DeclareReductionOp decl, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (decl.getAtomicReductionRegion().empty()) |
| return OwningAtomicReductionGen(); |
| |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningAtomicReductionGen atomicGen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, |
| llvm::Value *lhs, llvm::Value *rhs) mutable |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs); |
| moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(), |
| "omp.reduction.atomic.body", builder, |
| moduleTranslation, &phis))) |
| return llvm::createStringError( |
| "failed to inline `atomic` region of `omp.declare_reduction`"); |
| assert(phis.empty()); |
| return builder.saveIP(); |
| }; |
| return atomicGen; |
| } |
| |
| /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto orderedOp = cast<omp::OrderedOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType(); |
| bool isDependSource = dependType == omp::ClauseDepend::dependsource; |
| unsigned numLoops = *orderedOp.getDoacrossNumLoops(); |
| SmallVector<llvm::Value *> vecValues = |
| moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars()); |
| |
| size_t indexVecValues = 0; |
| while (indexVecValues < vecValues.size()) { |
| SmallVector<llvm::Value *> storeValues; |
| storeValues.reserve(numLoops); |
| for (unsigned i = 0; i < numLoops; i++) { |
| storeValues.push_back(vecValues[indexVecValues]); |
| indexVecValues++; |
| } |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( |
| ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); |
| } |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'ordered_region' operation into LLVM IR using |
| /// OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // OrderedOp has only one region associated with it. |
| auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); |
| builder.restoreIP(codeGenIP); |
| return convertOmpOpRegions(region, "omp.ordered.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( |
| ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd()); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| namespace { |
| /// Contains the arguments for an LLVM store operation |
| struct DeferredStore { |
| DeferredStore(llvm::Value *value, llvm::Value *address) |
| : value(value), address(address) {} |
| |
| llvm::Value *value; |
| llvm::Value *address; |
| }; |
| } // namespace |
| |
| /// Allocate space for privatized reduction variables. |
| /// `deferredStores` contains information to create store operations which needs |
| /// to be inserted after all allocas |
| template <typename T> |
| static LogicalResult |
| allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| SmallVectorImpl<llvm::Value *> &privateReductionVariables, |
| DenseMap<Value, llvm::Value *> &reductionVariableMap, |
| SmallVectorImpl<DeferredStore> &deferredStores, |
| llvm::ArrayRef<bool> isByRefs) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); |
| |
| // delay creating stores until after all allocas |
| deferredStores.reserve(loop.getNumReductionVars()); |
| |
| for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { |
| Region &allocRegion = reductionDecls[i].getAllocRegion(); |
| if (isByRefs[i]) { |
| if (allocRegion.empty()) |
| continue; |
| |
| SmallVector<llvm::Value *, 1> phis; |
| if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc", |
| builder, moduleTranslation, &phis))) |
| return loop.emitError( |
| "failed to inline `alloc` region of `omp.declare_reduction`"); |
| |
| assert(phis.size() == 1 && "expected one allocation to be yielded"); |
| builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); |
| |
| // Allocate reduction variable (which is a pointer to the real reduction |
| // variable allocated in the inlined region) |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| |
| llvm::Type *ptrTy = builder.getPtrTy(); |
| llvm::Value *castVar = |
| builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy); |
| llvm::Value *castPhi = |
| builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy); |
| |
| deferredStores.emplace_back(castPhi, castVar); |
| |
| privateReductionVariables[i] = castVar; |
| moduleTranslation.mapValue(reductionArgs[i], castPhi); |
| reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi); |
| } else { |
| assert(allocRegion.empty() && |
| "allocaction is implicit for by-val reduction"); |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| |
| llvm::Type *ptrTy = builder.getPtrTy(); |
| llvm::Value *castVar = |
| builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy); |
| |
| moduleTranslation.mapValue(reductionArgs[i], castVar); |
| privateReductionVariables[i] = castVar; |
| reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar); |
| } |
| } |
| |
| return success(); |
| } |
| |
| /// Map input arguments to reduction initialization region |
| template <typename T> |
| static void |
| mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| DenseMap<Value, llvm::Value *> &reductionVariableMap, |
| unsigned i) { |
| // map input argument to the initialization region |
| mlir::omp::DeclareReductionOp &reduction = reductionDecls[i]; |
| Region &initializerRegion = reduction.getInitializerRegion(); |
| Block &entry = initializerRegion.front(); |
| |
| mlir::Value mlirSource = loop.getReductionVars()[i]; |
| llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource); |
| assert(llvmSource && "lookup reduction var"); |
| moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource); |
| |
| if (entry.getNumArguments() > 1) { |
| llvm::Value *allocation = |
| reductionVariableMap.lookup(loop.getReductionVars()[i]); |
| moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation); |
| } |
| } |
| |
| static void |
| setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, |
| llvm::BasicBlock *block = nullptr) { |
| if (block == nullptr) |
| block = builder.GetInsertBlock(); |
| |
| if (block->empty() || block->getTerminator() == nullptr) |
| builder.SetInsertPoint(block); |
| else |
| builder.SetInsertPoint(block->getTerminator()); |
| } |
| |
| /// Inline reductions' `init` regions. This functions assumes that the |
| /// `builder`'s insertion point is where the user wants the `init` regions to be |
| /// inlined; i.e. it does not try to find a proper insertion location for the |
| /// `init` regions. It also leaves the `builder's insertions point in a state |
| /// where the user can continue the code-gen directly afterwards. |
| template <typename OP> |
| static LogicalResult |
| initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::BasicBlock *latestAllocaBlock, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| SmallVectorImpl<llvm::Value *> &privateReductionVariables, |
| DenseMap<Value, llvm::Value *> &reductionVariableMap, |
| llvm::ArrayRef<bool> isByRef, |
| SmallVectorImpl<DeferredStore> &deferredStores) { |
| if (op.getNumReductionVars() == 0) |
| return success(); |
| |
| llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init"); |
| auto allocaIP = llvm::IRBuilderBase::InsertPoint( |
| latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator()); |
| builder.restoreIP(allocaIP); |
| SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars()); |
| |
| for (unsigned i = 0; i < op.getNumReductionVars(); ++i) { |
| if (isByRef[i]) { |
| if (!reductionDecls[i].getAllocRegion().empty()) |
| continue; |
| |
| // TODO: remove after all users of by-ref are updated to use the alloc |
| // region: Allocate reduction variable (which is a pointer to the real |
| // reduciton variable allocated in the inlined region) |
| byRefVars[i] = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| } |
| } |
| |
| setInsertPointForPossiblyEmptyBlock(builder, initBlock); |
| |
| // store result of the alloc region to the allocated pointer to the real |
| // reduction variable |
| for (auto [data, addr] : deferredStores) |
| builder.CreateStore(data, addr); |
| |
| // Before the loop, store the initial values of reductions into reduction |
| // variables. Although this could be done after allocas, we don't want to mess |
| // up with the alloca insertion point. |
| for (unsigned i = 0; i < op.getNumReductionVars(); ++i) { |
| SmallVector<llvm::Value *, 1> phis; |
| |
| // map block argument to initializer region |
| mapInitializationArgs(op, moduleTranslation, reductionDecls, |
| reductionVariableMap, i); |
| |
| if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), |
| "omp.reduction.neutral", builder, |
| moduleTranslation, &phis))) |
| return failure(); |
| |
| assert(phis.size() == 1 && "expected one value to be yielded from the " |
| "reduction neutral element declaration region"); |
| |
| setInsertPointForPossiblyEmptyBlock(builder); |
| |
| if (isByRef[i]) { |
| if (!reductionDecls[i].getAllocRegion().empty()) |
| // done in allocReductionVars |
| continue; |
| |
| // TODO: this path can be removed once all users of by-ref are updated to |
| // use an alloc region |
| |
| // Store the result of the inlined region to the allocated reduction var |
| // ptr |
| builder.CreateStore(phis[0], byRefVars[i]); |
| |
| privateReductionVariables[i] = byRefVars[i]; |
| moduleTranslation.mapValue(reductionArgs[i], phis[0]); |
| reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]); |
| } else { |
| // for by-ref case the store is inside of the reduction region |
| builder.CreateStore(phis[0], privateReductionVariables[i]); |
| // the rest was handled in allocByValReductionVars |
| } |
| |
| // forget the mapping for the initializer region because we might need a |
| // different mapping if this reduction declaration is re-used for a |
| // different variable |
| moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); |
| } |
| |
| return success(); |
| } |
| |
| /// Collect reduction info |
| template <typename T> |
| static void collectReductionInfo( |
| T loop, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| SmallVectorImpl<OwningReductionGen> &owningReductionGens, |
| SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens, |
| const ArrayRef<llvm::Value *> privateReductionVariables, |
| SmallVectorImpl<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) { |
| unsigned numReductions = loop.getNumReductionVars(); |
| |
| for (unsigned i = 0; i < numReductions; ++i) { |
| owningReductionGens.push_back( |
| makeReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| owningAtomicReductionGens.push_back( |
| makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| } |
| |
| // Collect the reduction information. |
| reductionInfos.reserve(numReductions); |
| for (unsigned i = 0; i < numReductions; ++i) { |
| llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr; |
| if (owningAtomicReductionGens[i]) |
| atomicGen = owningAtomicReductionGens[i]; |
| llvm::Value *variable = |
| moduleTranslation.lookupValue(loop.getReductionVars()[i]); |
| reductionInfos.push_back( |
| {moduleTranslation.convertType(reductionDecls[i].getType()), variable, |
| privateReductionVariables[i], |
| /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar, |
| owningReductionGens[i], |
| /*ReductionGenClang=*/nullptr, atomicGen}); |
| } |
| } |
| |
| /// handling of DeclareReductionOp's cleanup region |
| static LogicalResult |
| inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions, |
| llvm::ArrayRef<llvm::Value *> privateVariables, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, StringRef regionName, |
| bool shouldLoadCleanupRegionArg = true) { |
| for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) { |
| if (cleanupRegion->empty()) |
| continue; |
| |
| // map the argument to the cleanup region |
| Block &entry = cleanupRegion->front(); |
| |
| llvm::Instruction *potentialTerminator = |
| builder.GetInsertBlock()->empty() ? nullptr |
| : &builder.GetInsertBlock()->back(); |
| if (potentialTerminator && potentialTerminator->isTerminator()) |
| builder.SetInsertPoint(potentialTerminator); |
| llvm::Value *privateVarValue = |
| shouldLoadCleanupRegionArg |
| ? builder.CreateLoad( |
| moduleTranslation.convertType(entry.getArgument(0).getType()), |
| privateVariables[i]) |
| : privateVariables[i]; |
| |
| moduleTranslation.mapValue(entry.getArgument(0), privateVarValue); |
| |
| if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder, |
| moduleTranslation))) |
| return failure(); |
| |
| // clear block argument mapping in case it needs to be re-created with a |
| // different source for another use of the same reduction decl |
| moduleTranslation.forgetMapping(*cleanupRegion); |
| } |
| return success(); |
| } |
| |
| // TODO: not used by ParallelOp |
| template <class OP> |
| static LogicalResult createReductionsAndCleanup( |
| OP op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef, |
| bool isNowait = false, bool isTeamsReduction = false) { |
| // Process the reductions if required. |
| if (op.getNumReductionVars() == 0) |
| return success(); |
| |
| SmallVector<OwningReductionGen> owningReductionGens; |
| SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // Create the reduction generators. We need to own them here because |
| // ReductionInfo only accepts references to the generators. |
| collectReductionInfo(op, builder, moduleTranslation, reductionDecls, |
| owningReductionGens, owningAtomicReductionGens, |
| privateReductionVariables, reductionInfos); |
| |
| // The call to createReductions below expects the block to have a |
| // terminator. Create an unreachable instruction to serve as terminator |
| // and remove it later. |
| llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); |
| builder.SetInsertPoint(tempTerminator); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = |
| ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, |
| isByRef, isNowait, isTeamsReduction); |
| |
| if (failed(handleError(contInsertPoint, *op))) |
| return failure(); |
| |
| if (!contInsertPoint->getBlock()) |
| return op->emitOpError() << "failed to convert reductions"; |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for); |
| |
| if (failed(handleError(afterIP, *op))) |
| return failure(); |
| |
| tempTerminator->eraseFromParent(); |
| builder.restoreIP(*afterIP); |
| |
| // after the construct, deallocate private reduction variables |
| SmallVector<Region *> reductionRegions; |
| llvm::transform(reductionDecls, std::back_inserter(reductionRegions), |
| [](omp::DeclareReductionOp reductionDecl) { |
| return &reductionDecl.getCleanupRegion(); |
| }); |
| return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables, |
| moduleTranslation, builder, |
| "omp.reduction.cleanup"); |
| return success(); |
| } |
| |
| static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) { |
| if (!attr) |
| return {}; |
| return *attr; |
| } |
| |
| // TODO: not used by omp.parallel |
| template <typename OP> |
| static LogicalResult allocAndInitializeReductionVars( |
| OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| SmallVectorImpl<llvm::Value *> &privateReductionVariables, |
| DenseMap<Value, llvm::Value *> &reductionVariableMap, |
| llvm::ArrayRef<bool> isByRef) { |
| if (op.getNumReductionVars() == 0) |
| return success(); |
| |
| SmallVector<DeferredStore> deferredStores; |
| |
| if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation, |
| allocaIP, reductionDecls, |
| privateReductionVariables, reductionVariableMap, |
| deferredStores, isByRef))) |
| return failure(); |
| |
| return initReductionVars(op, reductionArgs, builder, moduleTranslation, |
| allocaIP.getBlock(), reductionDecls, |
| privateReductionVariables, reductionVariableMap, |
| isByRef, deferredStores); |
| } |
| |
| /// Return the llvm::Value * corresponding to the `privateVar` that |
| /// is being privatized. It isn't always as simple as looking up |
| /// moduleTranslation with privateVar. For instance, in case of |
| /// an allocatable, the descriptor for the allocatable is privatized. |
| /// This descriptor is mapped using an MapInfoOp. So, this function |
| /// will return a pointer to the llvm::Value corresponding to the |
| /// block argument for the mapped descriptor. |
| static llvm::Value * |
| findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) { |
| if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar)) |
| return moduleTranslation.lookupValue(privateVar); |
| |
| Value blockArg = (*mappedPrivateVars)[privateVar]; |
| Type privVarType = privateVar.getType(); |
| Type blockArgType = blockArg.getType(); |
| assert(isa<LLVM::LLVMPointerType>(blockArgType) && |
| "A block argument corresponding to a mapped var should have " |
| "!llvm.ptr type"); |
| |
| if (privVarType == blockArgType) |
| return moduleTranslation.lookupValue(blockArg); |
| |
| // This typically happens when the privatized type is lowered from |
| // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the |
| // struct/pair is passed by value. But, mapped values are passed only as |
| // pointers, so before we privatize, we must load the pointer. |
| if (!isa<LLVM::LLVMPointerType>(privVarType)) |
| return builder.CreateLoad(moduleTranslation.convertType(privVarType), |
| moduleTranslation.lookupValue(blockArg)); |
| |
| return moduleTranslation.lookupValue(privateVar); |
| } |
| |
| /// Initialize a single (first)private variable. You probably want to use |
| /// allocateAndInitPrivateVars instead of this. |
| /// This returns the private variable which has been initialized. This |
| /// variable should be mapped before constructing the body of the Op. |
| static llvm::Expected<llvm::Value *> initPrivateVar( |
| llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, |
| omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, |
| llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, |
| llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) { |
| Region &initRegion = privDecl.getInitRegion(); |
| if (initRegion.empty()) |
| return llvmPrivateVar; |
| |
| // map initialization region block arguments |
| llvm::Value *nonPrivateVar = findAssociatedValue( |
| mlirPrivVar, builder, moduleTranslation, mappedPrivateVars); |
| assert(nonPrivateVar); |
| moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar); |
| moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar); |
| |
| // in-place convert the private initialization region |
| SmallVector<llvm::Value *, 1> phis; |
| if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder, |
| moduleTranslation, &phis))) |
| return llvm::createStringError( |
| "failed to inline `init` region of `omp.private`"); |
| |
| assert(phis.size() == 1 && "expected one allocation to be yielded"); |
| |
| // clear init region block argument mapping in case it needs to be |
| // re-created with a different source for another use of the same |
| // reduction decl |
| moduleTranslation.forgetMapping(initRegion); |
| |
| // Prefer the value yielded from the init region to the allocated private |
| // variable in case the region is operating on arguments by-value (e.g. |
| // Fortran character boxes). |
| return phis[0]; |
| } |
| |
| static llvm::Error |
| initPrivateVars(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| PrivateVarsInfo &privateVarsInfo, |
| llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) { |
| if (privateVarsInfo.blockArgs.empty()) |
| return llvm::Error::success(); |
| |
| llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init"); |
| setInsertPointForPossiblyEmptyBlock(builder, privInitBlock); |
| |
| for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal( |
| privateVarsInfo.privatizers, privateVarsInfo.mlirVars, |
| privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) { |
| auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip; |
| llvm::Expected<llvm::Value *> privVarOrErr = initPrivateVar( |
| builder, moduleTranslation, privDecl, mlirPrivVar, blockArg, |
| llvmPrivateVar, privInitBlock, mappedPrivateVars); |
| |
| if (!privVarOrErr) |
| return privVarOrErr.takeError(); |
| |
| llvmPrivateVar = privVarOrErr.get(); |
| moduleTranslation.mapValue(blockArg, llvmPrivateVar); |
| |
| setInsertPointForPossiblyEmptyBlock(builder); |
| } |
| |
| return llvm::Error::success(); |
| } |
| |
| /// Allocate and initialize delayed private variables. Returns the basic block |
| /// which comes after all of these allocations. llvm::Value * for each of these |
| /// private variables are populated in llvmPrivateVars. |
| static llvm::Expected<llvm::BasicBlock *> |
| allocatePrivateVars(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| PrivateVarsInfo &privateVarsInfo, |
| const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, |
| llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) { |
| // Allocate private vars |
| llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator(); |
| splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(), |
| allocaTerminator->getIterator()), |
| true, allocaTerminator->getStableDebugLoc(), |
| "omp.region.after_alloca"); |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| // Update the allocaTerminator since the alloca block was split above. |
| allocaTerminator = allocaIP.getBlock()->getTerminator(); |
| builder.SetInsertPoint(allocaTerminator); |
| // The new terminator is an uncondition branch created by the splitBB above. |
| assert(allocaTerminator->getNumSuccessors() == 1 && |
| "This is an unconditional branch created by splitBB"); |
| |
| llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout(); |
| llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0); |
| |
| unsigned int allocaAS = |
| moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace(); |
| unsigned int defaultAS = moduleTranslation.getLLVMModule() |
| ->getDataLayout() |
| .getProgramAddressSpace(); |
| |
| for (auto [privDecl, mlirPrivVar, blockArg] : |
| llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars, |
| privateVarsInfo.blockArgs)) { |
| llvm::Type *llvmAllocType = |
| moduleTranslation.convertType(privDecl.getType()); |
| builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); |
| llvm::Value *llvmPrivateVar = builder.CreateAlloca( |
| llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); |
| if (allocaAS != defaultAS) |
| llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar, |
| builder.getPtrTy(defaultAS)); |
| |
| privateVarsInfo.llvmVars.push_back(llvmPrivateVar); |
| } |
| |
| return afterAllocas; |
| } |
| |
| static LogicalResult copyFirstPrivateVars( |
| llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<mlir::Value> &mlirPrivateVars, |
| ArrayRef<llvm::Value *> llvmPrivateVars, |
| SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, |
| llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) { |
| // Apply copy region for firstprivate. |
| bool needsFirstprivate = |
| llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) { |
| return privOp.getDataSharingType() == |
| omp::DataSharingClauseType::FirstPrivate; |
| }); |
| |
| if (!needsFirstprivate) |
| return success(); |
| |
| llvm::BasicBlock *copyBlock = |
| splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); |
| setInsertPointForPossiblyEmptyBlock(builder, copyBlock); |
| |
| for (auto [decl, mlirVar, llvmVar] : |
| llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) { |
| if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate) |
| continue; |
| |
| // copyRegion implements `lhs = rhs` |
| Region ©Region = decl.getCopyRegion(); |
| |
| // map copyRegion rhs arg |
| llvm::Value *nonPrivateVar = findAssociatedValue( |
| mlirVar, builder, moduleTranslation, mappedPrivateVars); |
| assert(nonPrivateVar); |
| moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar); |
| |
| // map copyRegion lhs arg |
| moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar); |
| |
| // in-place convert copy region |
| if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder, |
| moduleTranslation))) |
| return decl.emitError("failed to inline `copy` region of `omp.private`"); |
| |
| setInsertPointForPossiblyEmptyBlock(builder); |
| |
| // ignore unused value yielded from copy region |
| |
| // clear copy region block argument mapping in case it needs to be |
| // re-created with different sources for reuse of the same reduction |
| // decl |
| moduleTranslation.forgetMapping(copyRegion); |
| } |
| |
| return success(); |
| } |
| |
| static LogicalResult |
| cleanupPrivateVars(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, Location loc, |
| SmallVectorImpl<llvm::Value *> &llvmPrivateVars, |
| SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) { |
| // private variable deallocation |
| SmallVector<Region *> privateCleanupRegions; |
| llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions), |
| [](omp::PrivateClauseOp privatizer) { |
| return &privatizer.getDeallocRegion(); |
| }); |
| |
| if (failed(inlineOmpRegionCleanup( |
| privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, |
| "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) |
| return mlir::emitError(loc, "failed to inline `dealloc` region of an " |
| "`omp.private` op in"); |
| |
| return success(); |
| } |
| |
| /// Returns true if the construct contains omp.cancel or omp.cancellation_point |
| static bool constructIsCancellable(Operation *op) { |
| // omp.cancel must be "closely nested" so it will be visible and not inside of |
| // funcion calls. This is enforced by the verifier. |
| return op |
| ->walk([](Operation *child) { |
| if (mlir::isa<omp::CancelOp>(child)) |
| return WalkResult::interrupt(); |
| return WalkResult::advance(); |
| }) |
| .wasInterrupted(); |
| } |
| |
| static LogicalResult |
| convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| using StorableBodyGenCallbackTy = |
| llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
| |
| auto sectionsOp = cast<omp::SectionsOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref()); |
| assert(isByRef.size() == sectionsOp.getNumReductionVars()); |
| |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| collectReductionDecls(sectionsOp, reductionDecls); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| SmallVector<llvm::Value *> privateReductionVariables( |
| sectionsOp.getNumReductionVars()); |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| |
| MutableArrayRef<BlockArgument> reductionArgs = |
| cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs(); |
| |
| if (failed(allocAndInitializeReductionVars( |
| sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, reductionVariableMap, |
| isByRef))) |
| return failure(); |
| |
| SmallVector<StorableBodyGenCallbackTy> sectionCBs; |
| |
| for (Operation &op : *sectionsOp.getRegion().begin()) { |
| auto sectionOp = dyn_cast<omp::SectionOp>(op); |
| if (!sectionOp) // omp.terminator |
| continue; |
| |
| Region ®ion = sectionOp.getRegion(); |
| auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation]( |
| InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| builder.restoreIP(codeGenIP); |
| |
| // map the omp.section reduction block argument to the omp.sections block |
| // arguments |
| // TODO: this assumes that the only block arguments are reduction |
| // variables |
| assert(region.getNumArguments() == |
| sectionsOp.getRegion().getNumArguments()); |
| for (auto [sectionsArg, sectionArg] : llvm::zip_equal( |
| sectionsOp.getRegion().getArguments(), region.getArguments())) { |
| llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg); |
| assert(llvmVal); |
| moduleTranslation.mapValue(sectionArg, llvmVal); |
| } |
| |
| return convertOmpOpRegions(region, "omp.section.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| sectionCBs.push_back(sectionCB); |
| } |
| |
| // No sections within omp.sections operation - skip generation. This situation |
| // is only possible if there is only a terminator operation inside the |
| // sections operation |
| if (sectionCBs.empty()) |
| return success(); |
| |
| assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin())); |
| |
| // TODO: Perform appropriate actions according to the data-sharing |
| // attribute (shared, private, firstprivate, ...) of variables. |
| // Currently defaults to shared. |
| auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, |
| llvm::Value &vPtr, llvm::Value *&replacementValue) |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| replacementValue = &vPtr; |
| return codeGenIP; |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| bool isCancellable = constructIsCancellable(sectionsOp); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createSections( |
| ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable, |
| sectionsOp.getNowait()); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| |
| // Process the reductions if required. |
| return createReductionsAndCleanup( |
| sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls, |
| privateReductionVariables, isByRef, sectionsOp.getNowait()); |
| } |
| |
| /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| if (failed(checkImplementationStatus(*singleOp))) |
| return failure(); |
| |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| builder.restoreIP(codegenIP); |
| return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", |
| builder, moduleTranslation) |
| .takeError(); |
| }; |
| auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; |
| |
| // Handle copyprivate |
| Operation::operand_range cpVars = singleOp.getCopyprivateVars(); |
| std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms(); |
| llvm::SmallVector<llvm::Value *> llvmCPVars; |
| llvm::SmallVector<llvm::Function *> llvmCPFuncs; |
| for (size_t i = 0, e = cpVars.size(); i < e; ++i) { |
| llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i])); |
| auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>( |
| singleOp, cast<SymbolRefAttr>((*cpFuncs)[i])); |
| llvmCPFuncs.push_back( |
| moduleTranslation.lookupFunction(llvmFuncOp.getName())); |
| } |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createSingle( |
| ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, |
| llvmCPFuncs); |
| |
| if (failed(handleError(afterIP, *singleOp))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) { |
| auto iface = |
| llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation()); |
| // Check that all uses of the reduction block arg has the same distribute op |
| // parent. |
| llvm::SmallVector<mlir::Operation *> debugUses; |
| Operation *distOp = nullptr; |
| for (auto ra : iface.getReductionBlockArgs()) |
| for (auto &use : ra.getUses()) { |
| auto *useOp = use.getOwner(); |
| // Ignore debug uses. |
| if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) { |
| debugUses.push_back(useOp); |
| continue; |
| } |
| |
| auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>(); |
| // Use is not inside a distribute op - return false |
| if (!currentDistOp) |
| return false; |
| // Multiple distribute operations - return false |
| Operation *currentOp = currentDistOp.getOperation(); |
| if (distOp && (distOp != currentOp)) |
| return false; |
| |
| distOp = currentOp; |
| } |
| |
| // If we are going to use distribute reduction then remove any debug uses of |
| // the reduction parameters in teamsOp. Otherwise they will be left without |
| // any mapped value in moduleTranslation and will eventually error out. |
| for (auto use : debugUses) |
| use->erase(); |
| return true; |
| } |
| |
| // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder |
| static LogicalResult |
| convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| if (failed(checkImplementationStatus(*op))) |
| return failure(); |
| |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| unsigned numReductionVars = op.getNumReductionVars(); |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| SmallVector<llvm::Value *> privateReductionVariables(numReductionVars); |
| llvm::ArrayRef<bool> isByRef; |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| // Only do teams reduction if there is no distribute op that captures the |
| // reduction instead. |
| bool doTeamsReduction = !teamsReductionContainedInDistribute(op); |
| if (doTeamsReduction) { |
| isByRef = getIsByRef(op.getReductionByref()); |
| |
| assert(isByRef.size() == op.getNumReductionVars()); |
| |
| MutableArrayRef<BlockArgument> reductionArgs = |
| llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs(); |
| |
| collectReductionDecls(op, reductionDecls); |
| |
| if (failed(allocAndInitializeReductionVars( |
| op, reductionArgs, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, reductionVariableMap, |
| isByRef))) |
| return failure(); |
| } |
| |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| builder.restoreIP(codegenIP); |
| return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, |
| moduleTranslation) |
| .takeError(); |
| }; |
| |
| llvm::Value *numTeamsLower = nullptr; |
| if (Value numTeamsLowerVar = op.getNumTeamsLower()) |
| numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar); |
| |
| llvm::Value *numTeamsUpper = nullptr; |
| if (Value numTeamsUpperVar = op.getNumTeamsUpper()) |
| numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar); |
| |
| llvm::Value *threadLimit = nullptr; |
| if (Value threadLimitVar = op.getThreadLimit()) |
| threadLimit = moduleTranslation.lookupValue(threadLimitVar); |
| |
| llvm::Value *ifExpr = nullptr; |
| if (Value ifVar = op.getIfExpr()) |
| ifExpr = moduleTranslation.lookupValue(ifVar); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createTeams( |
| ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr); |
| |
| if (failed(handleError(afterIP, *op))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| if (doTeamsReduction) { |
| // Process the reductions if required. |
| return createReductionsAndCleanup( |
| op, builder, moduleTranslation, allocaIP, reductionDecls, |
| privateReductionVariables, isByRef, |
| /*isNoWait*/ false, /*isTeamsReduction*/ true); |
| } |
| return success(); |
| } |
| |
| static void |
| buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<llvm::OpenMPIRBuilder::DependData> &dds) { |
| if (dependVars.empty()) |
| return; |
| for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) { |
| llvm::omp::RTLDependenceKindTy type; |
| switch ( |
| cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) { |
| case mlir::omp::ClauseTaskDepend::taskdependin: |
| type = llvm::omp::RTLDependenceKindTy::DepIn; |
| break; |
| // The OpenMP runtime requires that the codegen for 'depend' clause for |
| // 'out' dependency kind must be the same as codegen for 'depend' clause |
| // with 'inout' dependency. |
| case mlir::omp::ClauseTaskDepend::taskdependout: |
| case mlir::omp::ClauseTaskDepend::taskdependinout: |
| type = llvm::omp::RTLDependenceKindTy::DepInOut; |
| break; |
| case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset: |
| type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet; |
| break; |
| case mlir::omp::ClauseTaskDepend::taskdependinoutset: |
| type = llvm::omp::RTLDependenceKindTy::DepInOutSet; |
| break; |
| }; |
| llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep)); |
| llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal); |
| dds.emplace_back(dd); |
| } |
| } |
| |
| namespace { |
| /// TaskContextStructManager takes care of creating and freeing a structure |
| /// containing information needed by the task body to execute. |
| class TaskContextStructManager { |
| public: |
| TaskContextStructManager(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| MutableArrayRef<omp::PrivateClauseOp> privateDecls) |
| : builder{builder}, moduleTranslation{moduleTranslation}, |
| privateDecls{privateDecls} {} |
| |
| /// Creates a heap allocated struct containing space for each private |
| /// variable. Invariant: privateVarTypes, privateDecls, and the elements of |
| /// the structure should all have the same order (although privateDecls which |
| /// do not read from the mold argument are skipped). |
| void generateTaskContextStruct(); |
| |
| /// Create GEPs to access each member of the structure representing a private |
| /// variable, adding them to llvmPrivateVars. Null values are added where |
| /// private decls were skipped so that the ordering continues to match the |
| /// private decls. |
| void createGEPsToPrivateVars(); |
| |
| /// De-allocate the task context structure. |
| void freeStructPtr(); |
| |
| MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() { |
| return llvmPrivateVarGEPs; |
| } |
| |
| llvm::Value *getStructPtr() { return structPtr; } |
| |
| private: |
| llvm::IRBuilderBase &builder; |
| LLVM::ModuleTranslation &moduleTranslation; |
| MutableArrayRef<omp::PrivateClauseOp> privateDecls; |
| |
| /// The type of each member of the structure, in order. |
| SmallVector<llvm::Type *> privateVarTypes; |
| |
| /// LLVM values for each private variable, or null if that private variable is |
| /// not included in the task context structure |
| SmallVector<llvm::Value *> llvmPrivateVarGEPs; |
| |
| /// A pointer to the structure containing context for this task. |
| llvm::Value *structPtr = nullptr; |
| /// The type of the structure |
| llvm::Type *structTy = nullptr; |
| }; |
| } // namespace |
| |
| void TaskContextStructManager::generateTaskContextStruct() { |
| if (privateDecls.empty()) |
| return; |
| privateVarTypes.reserve(privateDecls.size()); |
| |
| for (omp::PrivateClauseOp &privOp : privateDecls) { |
| // Skip private variables which can safely be allocated and initialised |
| // inside of the task |
| if (!privOp.readsFromMold()) |
| continue; |
| Type mlirType = privOp.getType(); |
| privateVarTypes.push_back(moduleTranslation.convertType(mlirType)); |
| } |
| |
| structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(), |
| privateVarTypes); |
| |
| llvm::DataLayout dataLayout = |
| builder.GetInsertBlock()->getModule()->getDataLayout(); |
| llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout); |
| llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy); |
| |
| // Heap allocate the structure |
| structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize, |
| /*ArraySize=*/nullptr, /*MallocF=*/nullptr, |
| "omp.task.context_ptr"); |
| } |
| |
| void TaskContextStructManager::createGEPsToPrivateVars() { |
| if (!structPtr) { |
| assert(privateVarTypes.empty()); |
| return; |
| } |
| |
| // Create GEPs for each struct member |
| llvmPrivateVarGEPs.clear(); |
| llvmPrivateVarGEPs.reserve(privateDecls.size()); |
| llvm::Value *zero = builder.getInt32(0); |
| unsigned i = 0; |
| for (auto privDecl : privateDecls) { |
| if (!privDecl.readsFromMold()) { |
| // Handle this inside of the task so we don't pass unnessecary vars in |
| llvmPrivateVarGEPs.push_back(nullptr); |
| continue; |
| } |
| llvm::Value *iVal = builder.getInt32(i); |
| llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal}); |
| llvmPrivateVarGEPs.push_back(gep); |
| i += 1; |
| } |
| } |
| |
| void TaskContextStructManager::freeStructPtr() { |
| if (!structPtr) |
| return; |
| |
| llvm::IRBuilderBase::InsertPointGuard guard{builder}; |
| // Ensure we don't put the call to free() after the terminator |
| builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); |
| builder.CreateFree(structPtr); |
| } |
| |
| /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| if (failed(checkImplementationStatus(*taskOp))) |
| return failure(); |
| |
| PrivateVarsInfo privateVarsInfo(taskOp); |
| TaskContextStructManager taskStructMgr{builder, moduleTranslation, |
| privateVarsInfo.privatizers}; |
| |
| // Allocate and copy private variables before creating the task. This avoids |
| // accessing invalid memory if (after this scope ends) the private variables |
| // are initialized from host variables or if the variables are copied into |
| // from host variables (firstprivate). The insertion point is just before |
| // where the code for creating and scheduling the task will go. That puts this |
| // code outside of the outlined task region, which is what we want because |
| // this way the initialization and copy regions are executed immediately while |
| // the host variable data are still live. |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| // Not using splitBB() because that requires the current block to have a |
| // terminator. |
| assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end()); |
| llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create( |
| builder.getContext(), "omp.task.start", |
| /*Parent=*/builder.GetInsertBlock()->getParent()); |
| llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock); |
| builder.SetInsertPoint(branchToTaskStartBlock); |
| |
| // Now do this again to make the initialization and copy blocks |
| llvm::BasicBlock *copyBlock = |
| splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); |
| llvm::BasicBlock *initBlock = |
| splitBB(builder, /*CreateBranch=*/true, "omp.private.init"); |
| |
| // Now the control flow graph should look like |
| // starter_block: |
| // <---- where we started when convertOmpTaskOp was called |
| // br %omp.private.init |
| // omp.private.init: |
| // br %omp.private.copy |
| // omp.private.copy: |
| // br %omp.task.start |
| // omp.task.start: |
| // <---- where we want the insertion point to be when we call createTask() |
| |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // Allocate and initialize private variables |
| builder.SetInsertPoint(initBlock->getTerminator()); |
| |
| // Create task variable structure |
| taskStructMgr.generateTaskContextStruct(); |
| // GEPs so that we can initialize the variables. Don't use these GEPs inside |
| // of the body otherwise it will be the GEP not the struct which is fowarded |
| // to the outlined function. GEPs forwarded in this way are passed in a |
| // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks |
| // which may not be executed until after the current stack frame goes out of |
| // scope. |
| taskStructMgr.createGEPsToPrivateVars(); |
| |
| for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] : |
| llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars, |
| privateVarsInfo.blockArgs, |
| taskStructMgr.getLLVMPrivateVarGEPs())) { |
| // To be handled inside the task. |
| if (!privDecl.readsFromMold()) |
| continue; |
| assert(llvmPrivateVarAlloc && |
| "reads from mold so shouldn't have been skipped"); |
| |
| llvm::Expected<llvm::Value *> privateVarOrErr = |
| initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, |
| blockArg, llvmPrivateVarAlloc, initBlock); |
| if (!privateVarOrErr) |
| return handleError(privateVarOrErr, *taskOp.getOperation()); |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); |
| |
| // TODO: this is a bit of a hack for Fortran character boxes. |
| // Character boxes are passed by value into the init region and then the |
| // initialized character box is yielded by value. Here we need to store the |
| // yielded value into the private allocation, and load the private |
| // allocation to match the type expected by region block arguments. |
| if ((privateVarOrErr.get() != llvmPrivateVarAlloc) && |
| !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) { |
| builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc); |
| // Load it so we have the value pointed to by the GEP |
| llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(), |
| llvmPrivateVarAlloc); |
| } |
| assert(llvmPrivateVarAlloc->getType() == |
| moduleTranslation.convertType(blockArg.getType())); |
| |
| // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback |
| // so that OpenMPIRBuilder doesn't try to pass each GEP address through a |
| // stack allocated structure. |
| } |
| |
| // firstprivate copy region |
| setInsertPointForPossiblyEmptyBlock(builder, copyBlock); |
| if (failed(copyFirstPrivateVars( |
| builder, moduleTranslation, privateVarsInfo.mlirVars, |
| taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers))) |
| return llvm::failure(); |
| |
| // Set up for call to createTask() |
| builder.SetInsertPoint(taskStartBlock); |
| |
| auto bodyCB = [&](InsertPointTy allocaIP, |
| InsertPointTy codegenIP) -> llvm::Error { |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // translate the body of the task: |
| builder.restoreIP(codegenIP); |
| |
| llvm::BasicBlock *privInitBlock = nullptr; |
| privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size()); |
| for (auto [i, zip] : llvm::enumerate(llvm::zip_equal( |
| privateVarsInfo.blockArgs, privateVarsInfo.privatizers, |
| privateVarsInfo.mlirVars))) { |
| auto [blockArg, privDecl, mlirPrivVar] = zip; |
| // This is handled before the task executes |
| if (privDecl.readsFromMold()) |
| continue; |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| llvm::Type *llvmAllocType = |
| moduleTranslation.convertType(privDecl.getType()); |
| builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); |
| llvm::Value *llvmPrivateVar = builder.CreateAlloca( |
| llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); |
| |
| llvm::Expected<llvm::Value *> privateVarOrError = |
| initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, |
| blockArg, llvmPrivateVar, privInitBlock); |
| if (!privateVarOrError) |
| return privateVarOrError.takeError(); |
| moduleTranslation.mapValue(blockArg, privateVarOrError.get()); |
| privateVarsInfo.llvmVars[i] = privateVarOrError.get(); |
| } |
| |
| taskStructMgr.createGEPsToPrivateVars(); |
| for (auto [i, llvmPrivVar] : |
| llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) { |
| if (!llvmPrivVar) { |
| assert(privateVarsInfo.llvmVars[i] && |
| "This is added in the loop above"); |
| continue; |
| } |
| privateVarsInfo.llvmVars[i] = llvmPrivVar; |
| } |
| |
| // Find and map the addresses of each variable within the task context |
| // structure |
| for (auto [blockArg, llvmPrivateVar, privateDecl] : |
| llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars, |
| privateVarsInfo.privatizers)) { |
| // This was handled above. |
| if (!privateDecl.readsFromMold()) |
| continue; |
| // Fix broken pass-by-value case for Fortran character boxes |
| if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) { |
| llvmPrivateVar = builder.CreateLoad( |
| moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar); |
| } |
| assert(llvmPrivateVar->getType() == |
| moduleTranslation.convertType(blockArg.getType())); |
| moduleTranslation.mapValue(blockArg, llvmPrivateVar); |
| } |
| |
| auto continuationBlockOrError = convertOmpOpRegions( |
| taskOp.getRegion(), "omp.task.region", builder, moduleTranslation); |
| if (failed(handleError(continuationBlockOrError, *taskOp))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator()); |
| |
| if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(), |
| privateVarsInfo.llvmVars, |
| privateVarsInfo.privatizers))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| // Free heap allocated task context structure at the end of the task. |
| taskStructMgr.freeStructPtr(); |
| |
| return llvm::Error::success(); |
| }; |
| |
| SmallVector<llvm::OpenMPIRBuilder::DependData> dds; |
| buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(), |
| moduleTranslation, dds); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createTask( |
| ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), |
| moduleTranslation.lookupValue(taskOp.getFinal()), |
| moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, |
| taskOp.getMergeable(), |
| moduleTranslation.lookupValue(taskOp.getEventHandle()), |
| moduleTranslation.lookupValue(taskOp.getPriority())); |
| |
| if (failed(handleError(afterIP, *taskOp))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| if (failed(checkImplementationStatus(*tgOp))) |
| return failure(); |
| |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| builder.restoreIP(codegenIP); |
| return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", |
| builder, moduleTranslation) |
| .takeError(); |
| }; |
| |
| InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP, |
| bodyCB); |
| |
| if (failed(handleError(afterIP, *tgOp))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| static LogicalResult |
| convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (failed(checkImplementationStatus(*twOp))) |
| return failure(); |
| |
| moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP()); |
| return success(); |
| } |
| |
| /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| auto wsloopOp = cast<omp::WsloopOp>(opInst); |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop()); |
| llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref()); |
| assert(isByRef.size() == wsloopOp.getNumReductionVars()); |
| |
| // Static is the default. |
| auto schedule = |
| wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static); |
| |
| // Find the loop configuration. |
| llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]); |
| llvm::Type *ivType = step->getType(); |
| llvm::Value *chunk = nullptr; |
| if (wsloopOp.getScheduleChunk()) { |
| llvm::Value *chunkVar = |
| moduleTranslation.lookupValue(wsloopOp.getScheduleChunk()); |
| chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); |
| } |
| |
| PrivateVarsInfo privateVarsInfo(wsloopOp); |
| |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| collectReductionDecls(wsloopOp, reductionDecls); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| SmallVector<llvm::Value *> privateReductionVariables( |
| wsloopOp.getNumReductionVars()); |
| |
| llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( |
| builder, moduleTranslation, privateVarsInfo, allocaIP); |
| if (handleError(afterAllocas, opInst).failed()) |
| return failure(); |
| |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| |
| MutableArrayRef<BlockArgument> reductionArgs = |
| cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs(); |
| |
| SmallVector<DeferredStore> deferredStores; |
| |
| if (failed(allocReductionVars(wsloopOp, reductionArgs, builder, |
| moduleTranslation, allocaIP, reductionDecls, |
| privateReductionVariables, reductionVariableMap, |
| deferredStores, isByRef))) |
| return failure(); |
| |
| if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo), |
| opInst) |
| .failed()) |
| return failure(); |
| |
| if (failed(copyFirstPrivateVars( |
| builder, moduleTranslation, privateVarsInfo.mlirVars, |
| privateVarsInfo.llvmVars, privateVarsInfo.privatizers))) |
| return failure(); |
| |
| assert(afterAllocas.get()->getSinglePredecessor()); |
| if (failed(initReductionVars(wsloopOp, reductionArgs, builder, |
| moduleTranslation, |
| afterAllocas.get()->getSinglePredecessor(), |
| reductionDecls, privateReductionVariables, |
| reductionVariableMap, isByRef, deferredStores))) |
| return failure(); |
| |
| // TODO: Handle doacross loops when the ordered clause has a parameter. |
| bool isOrdered = wsloopOp.getOrdered().has_value(); |
| std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod(); |
| bool isSimd = wsloopOp.getScheduleSimd(); |
| bool loopNeedsBarrier = !wsloopOp.getNowait(); |
| |
| // The only legal way for the direct parent to be omp.distribute is that this |
| // represents 'distribute parallel do'. Otherwise, this is a regular |
| // worksharing loop. |
| llvm::omp::WorksharingLoopType workshareLoopType = |
| llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp()) |
| ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop |
| : llvm::omp::WorksharingLoopType::ForStaticLoop; |
| |
| SmallVector<llvm::BranchInst *> cancelTerminators; |
| // This callback is invoked only if there is cancellation inside of the wsloop |
| // body. |
| auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error { |
| llvm::IRBuilderBase &llvmBuilder = ompBuilder->Builder; |
| llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder); |
| |
| // ip is currently in the block branched to if cancellation occured. |
| // We need to create a branch to terminate that block. |
| llvmBuilder.restoreIP(ip); |
| |
| // We must still clean up the wsloop after cancelling it, so we need to |
| // branch to the block that finalizes the wsloop. |
| // That block has not been created yet so use this block as a dummy for now |
| // and fix this after creating the wsloop. |
| cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock())); |
| return llvm::Error::success(); |
| }; |
| // We have to add the cleanup to the OpenMPIRBuilder before the body gets |
| // created in case the body contains omp.cancel (which will then expect to be |
| // able to find this cleanup callback). |
| ompBuilder->pushFinalizationCB({finiCB, llvm::omp::Directive::OMPD_for, |
| constructIsCancellable(wsloopOp)}); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions( |
| wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); |
| |
| if (failed(handleError(regionBlock, opInst))) |
| return failure(); |
| |
| builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); |
| llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = |
| ompBuilder->applyWorkshareLoop( |
| ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, |
| convertToScheduleKind(schedule), chunk, isSimd, |
| scheduleMod == omp::ScheduleModifier::monotonic, |
| scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, |
| workshareLoopType); |
| |
| if (failed(handleError(wsloopIP, opInst))) |
| return failure(); |
| |
| ompBuilder->popFinalizationCB(); |
| if (!cancelTerminators.empty()) { |
| // If we cancelled the loop, we should branch to the finalization block of |
| // the wsloop (which is always immediately before the loop continuation |
| // block). Now the finalization has been created, we can fix the branch. |
| llvm::BasicBlock *wsloopFini = wsloopIP->getBlock()->getSinglePredecessor(); |
| for (llvm::BranchInst *cancelBranch : cancelTerminators) { |
| assert(cancelBranch->getNumSuccessors() == 1 && |
| "cancel branch should have one target"); |
| cancelBranch->setSuccessor(0, wsloopFini); |
| } |
| } |
| |
| // Process the reductions if required. |
| if (failed(createReductionsAndCleanup( |
| wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, |
| privateReductionVariables, isByRef, wsloopOp.getNowait(), |
| /*isTeamsReduction=*/false))) |
| return failure(); |
| |
| return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(), |
| privateVarsInfo.llvmVars, |
| privateVarsInfo.privatizers); |
| } |
| |
| /// Converts the OpenMP parallel operation to LLVM IR. |
| static LogicalResult |
| convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref()); |
| assert(isByRef.size() == opInst.getNumReductionVars()); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| if (failed(checkImplementationStatus(*opInst))) |
| return failure(); |
| |
| PrivateVarsInfo privateVarsInfo(opInst); |
| |
| // Collect reduction declarations |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| collectReductionDecls(opInst, reductionDecls); |
| SmallVector<llvm::Value *> privateReductionVariables( |
| opInst.getNumReductionVars()); |
| SmallVector<DeferredStore> deferredStores; |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, |
| InsertPointTy codeGenIP) -> llvm::Error { |
| llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( |
| builder, moduleTranslation, privateVarsInfo, allocaIP); |
| if (handleError(afterAllocas, *opInst).failed()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| // Allocate reduction vars |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| |
| MutableArrayRef<BlockArgument> reductionArgs = |
| cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs(); |
| |
| allocaIP = |
| InsertPointTy(allocaIP.getBlock(), |
| allocaIP.getBlock()->getTerminator()->getIterator()); |
| |
| if (failed(allocReductionVars( |
| opInst, reductionArgs, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, reductionVariableMap, |
| deferredStores, isByRef))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| assert(afterAllocas.get()->getSinglePredecessor()); |
| builder.restoreIP(codeGenIP); |
| |
| if (handleError( |
| initPrivateVars(builder, moduleTranslation, privateVarsInfo), |
| *opInst) |
| .failed()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| if (failed(copyFirstPrivateVars( |
| builder, moduleTranslation, privateVarsInfo.mlirVars, |
| privateVarsInfo.llvmVars, privateVarsInfo.privatizers))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| if (failed( |
| initReductionVars(opInst, reductionArgs, builder, moduleTranslation, |
| afterAllocas.get()->getSinglePredecessor(), |
| reductionDecls, privateReductionVariables, |
| reductionVariableMap, isByRef, deferredStores))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // ParallelOp has only one region associated with it. |
| llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions( |
| opInst.getRegion(), "omp.par.region", builder, moduleTranslation); |
| if (!regionBlock) |
| return regionBlock.takeError(); |
| |
| // Process the reductions if required. |
| if (opInst.getNumReductionVars() > 0) { |
| // Collect reduction info |
| SmallVector<OwningReductionGen> owningReductionGens; |
| SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; |
| collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls, |
| owningReductionGens, owningAtomicReductionGens, |
| privateReductionVariables, reductionInfos); |
| |
| // Move to region cont block |
| builder.SetInsertPoint((*regionBlock)->getTerminator()); |
| |
| // Generate reductions from info |
| llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); |
| builder.SetInsertPoint(tempTerminator); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = |
| ompBuilder->createReductions( |
| builder.saveIP(), allocaIP, reductionInfos, isByRef, |
| /*IsNoWait=*/false, /*IsTeamsReduction=*/false); |
| if (!contInsertPoint) |
| return contInsertPoint.takeError(); |
| |
| if (!contInsertPoint->getBlock()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| tempTerminator->eraseFromParent(); |
| builder.restoreIP(*contInsertPoint); |
| } |
| |
| return llvm::Error::success(); |
| }; |
| |
| auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::Value &, llvm::Value &val, llvm::Value *&replVal) { |
| // tell OpenMPIRBuilder not to do anything. We handled Privatisation in |
| // bodyGenCB. |
| replVal = &val; |
| return codeGenIP; |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error { |
| InsertPointTy oldIP = builder.saveIP(); |
| builder.restoreIP(codeGenIP); |
| |
| // if the reduction has a cleanup region, inline it here to finalize the |
| // reduction variables |
| SmallVector<Region *> reductionCleanupRegions; |
| llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions), |
| [](omp::DeclareReductionOp reductionDecl) { |
| return &reductionDecl.getCleanupRegion(); |
| }); |
| if (failed(inlineOmpRegionCleanup( |
| reductionCleanupRegions, privateReductionVariables, |
| moduleTranslation, builder, "omp.reduction.cleanup"))) |
| return llvm::createStringError( |
| "failed to inline `cleanup` region of `omp.declare_reduction`"); |
| |
| if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(), |
| privateVarsInfo.llvmVars, |
| privateVarsInfo.privatizers))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| builder.restoreIP(oldIP); |
| return llvm::Error::success(); |
| }; |
| |
| llvm::Value *ifCond = nullptr; |
| if (auto ifVar = opInst.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| llvm::Value *numThreads = nullptr; |
| if (auto numThreadsVar = opInst.getNumThreads()) |
| numThreads = moduleTranslation.lookupValue(numThreadsVar); |
| auto pbKind = llvm::omp::OMP_PROC_BIND_default; |
| if (auto bind = opInst.getProcBindKind()) |
| pbKind = getProcBindKind(*bind); |
| bool isCancellable = constructIsCancellable(opInst); |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, |
| ifCond, numThreads, pbKind, isCancellable); |
| |
| if (failed(handleError(afterIP, *opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| /// Convert Order attribute to llvm::omp::OrderKind. |
| static llvm::omp::OrderKind |
| convertOrderKind(std::optional<omp::ClauseOrderKind> o) { |
| if (!o) |
| return llvm::omp::OrderKind::OMP_ORDER_unknown; |
| switch (*o) { |
| case omp::ClauseOrderKind::Concurrent: |
| return llvm::omp::OrderKind::OMP_ORDER_concurrent; |
| } |
| llvm_unreachable("Unknown ClauseOrderKind kind"); |
| } |
| |
| /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| auto simdOp = cast<omp::SimdOp>(opInst); |
| |
| // TODO: Replace this with proper composite translation support. |
| // Currently, simd information on composite constructs is ignored, so e.g. |
| // 'do/for simd' will be treated the same as a standalone 'do/for'. This is |
| // allowed by the spec, since it's equivalent to using a SIMD length of 1. |
| if (simdOp.isComposite()) { |
| if (failed(convertIgnoredWrapper(simdOp, moduleTranslation))) |
| return failure(); |
| |
| return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region", |
| builder, moduleTranslation); |
| } |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| PrivateVarsInfo privateVarsInfo(simdOp); |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( |
| builder, moduleTranslation, privateVarsInfo, allocaIP); |
| if (handleError(afterAllocas, opInst).failed()) |
| return failure(); |
| |
| if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo), |
| opInst) |
| .failed()) |
| return failure(); |
| |
| llvm::ConstantInt *simdlen = nullptr; |
| if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen()) |
| simdlen = builder.getInt64(simdlenVar.value()); |
| |
| llvm::ConstantInt *safelen = nullptr; |
| if (std::optional<uint64_t> safelenVar = simdOp.getSafelen()) |
| safelen = builder.getInt64(safelenVar.value()); |
| |
| llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars; |
| llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder()); |
| |
| llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); |
| std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments(); |
| mlir::OperandRange operands = simdOp.getAlignedVars(); |
| for (size_t i = 0; i < operands.size(); ++i) { |
| llvm::Value *alignment = nullptr; |
| llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]); |
| llvm::Type *ty = llvmVal->getType(); |
| |
| auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]); |
| alignment = builder.getInt64(intAttr.getInt()); |
| assert(ty->isPointerTy() && "Invalid type for aligned variable"); |
| assert(alignment && "Invalid alignment value"); |
| auto curInsert = builder.saveIP(); |
| builder.SetInsertPoint(sourceBlock); |
| llvmVal = builder.CreateLoad(ty, llvmVal); |
| builder.restoreIP(curInsert); |
| alignedVars[llvmVal] = alignment; |
| } |
| |
| llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions( |
| simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation); |
| |
| if (failed(handleError(regionBlock, opInst))) |
| return failure(); |
| |
| builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); |
| llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation); |
| ompBuilder->applySimd(loopInfo, alignedVars, |
| simdOp.getIfExpr() |
| ? moduleTranslation.lookupValue(simdOp.getIfExpr()) |
| : nullptr, |
| order, simdlen, safelen); |
| |
| return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(), |
| privateVarsInfo.llvmVars, |
| privateVarsInfo.privatizers); |
| } |
| |
| /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| auto loopOp = cast<omp::LoopNestOp>(opInst); |
| |
| // Set up the source location value for OpenMP runtime. |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| // Generator of the canonical loop body. |
| SmallVector<llvm::CanonicalLoopInfo *> loopInfos; |
| SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; |
| auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, |
| llvm::Value *iv) -> llvm::Error { |
| // Make sure further conversions know about the induction variable. |
| moduleTranslation.mapValue( |
| loopOp.getRegion().front().getArgument(loopInfos.size()), iv); |
| |
| // Capture the body insertion point for use in nested loops. BodyIP of the |
| // CanonicalLoopInfo always points to the beginning of the entry block of |
| // the body. |
| bodyInsertPoints.push_back(ip); |
| |
| if (loopInfos.size() != loopOp.getNumLoops() - 1) |
| return llvm::Error::success(); |
| |
| // Convert the body of the loop. |
| builder.restoreIP(ip); |
| llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions( |
| loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation); |
| if (!regionBlock) |
| return regionBlock.takeError(); |
| |
| builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); |
| return llvm::Error::success(); |
| }; |
| |
| // Delegate actual loop construction to the OpenMP IRBuilder. |
| // TODO: this currently assumes omp.loop_nest is semantically similar to SCF |
| // loop, i.e. it has a positive step, uses signed integer semantics. |
| // Reconsider this code when the nested loop operation clearly supports more |
| // cases. |
| for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { |
| llvm::Value *lowerBound = |
| moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]); |
| llvm::Value *upperBound = |
| moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]); |
| llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]); |
| |
| // Make sure loop trip count are emitted in the preheader of the outermost |
| // loop at the latest so that they are all available for the new collapsed |
| // loop will be created below. |
| llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; |
| llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; |
| if (i != 0) { |
| loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), |
| ompLoc.DL); |
| computeIP = loopInfos.front()->getPreheaderIP(); |
| } |
| |
| llvm::Expected<llvm::CanonicalLoopInfo *> loopResult = |
| ompBuilder->createCanonicalLoop( |
| loc, bodyGen, lowerBound, upperBound, step, |
| /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP); |
| |
| if (failed(handleError(loopResult, *loopOp))) |
| return failure(); |
| |
| loopInfos.push_back(*loopResult); |
| } |
| |
| // Collapse loops. Store the insertion point because LoopInfos may get |
| // invalidated. |
| llvm::OpenMPIRBuilder::InsertPointTy afterIP = |
| loopInfos.front()->getAfterIP(); |
| |
| // Update the stack frame created for this loop to point to the resulting loop |
| // after applying transformations. |
| moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>( |
| [&](OpenMPLoopInfoStackFrame &frame) { |
| frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); |
| return WalkResult::interrupt(); |
| }); |
| |
| // Continue building IR after the loop. Note that the LoopInfo returned by |
| // `collapseLoops` points inside the outermost loop and is intended for |
| // potential further loop transformations. Use the insertion point stored |
| // before collapsing loops instead. |
| builder.restoreIP(afterIP); |
| return success(); |
| } |
| |
| /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. |
| static llvm::AtomicOrdering |
| convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) { |
| if (!ao) |
| return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering |
| |
| switch (*ao) { |
| case omp::ClauseMemoryOrderKind::Seq_cst: |
| return llvm::AtomicOrdering::SequentiallyConsistent; |
| case omp::ClauseMemoryOrderKind::Acq_rel: |
| return llvm::AtomicOrdering::AcquireRelease; |
| case omp::ClauseMemoryOrderKind::Acquire: |
| return llvm::AtomicOrdering::Acquire; |
| case omp::ClauseMemoryOrderKind::Release: |
| return llvm::AtomicOrdering::Release; |
| case omp::ClauseMemoryOrderKind::Relaxed: |
| return llvm::AtomicOrdering::Monotonic; |
| } |
| llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); |
| } |
| |
| /// Convert omp.atomic.read operation to LLVM IR. |
| static LogicalResult |
| convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto readOp = cast<omp::AtomicReadOp>(opInst); |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder()); |
| llvm::Value *x = moduleTranslation.lookupValue(readOp.getX()); |
| llvm::Value *v = moduleTranslation.lookupValue(readOp.getV()); |
| |
| llvm::Type *elementType = |
| moduleTranslation.convertType(readOp.getElementType()); |
| |
| llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false}; |
| llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false}; |
| builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP)); |
| return success(); |
| } |
| |
| /// Converts an omp.atomic.write operation to LLVM IR. |
| static LogicalResult |
| convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto writeOp = cast<omp::AtomicWriteOp>(opInst); |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder()); |
| llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr()); |
| llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX()); |
| llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| builder.restoreIP( |
| ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP)); |
| return success(); |
| } |
| |
| /// Converts an LLVM dialect binary operation to the corresponding enum value |
| /// for `atomicrmw` supported binary operation. |
| llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { |
| return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) |
| .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) |
| .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) |
| .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) |
| .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) |
| .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) |
| .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) |
| .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) |
| .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) |
| .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) |
| .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); |
| } |
| |
| /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| if (failed(checkImplementationStatus(*opInst))) |
| return failure(); |
| |
| // Convert values and types. |
| auto &innerOpList = opInst.getRegion().front().getOperations(); |
| bool isXBinopExpr{false}; |
| llvm::AtomicRMWInst::BinOp binop; |
| mlir::Value mlirExpr; |
| llvm::Value *llvmExpr = nullptr; |
| llvm::Value *llvmX = nullptr; |
| llvm::Type *llvmXElementType = nullptr; |
| if (innerOpList.size() == 2) { |
| // The two operations here are the update and the terminator. |
| // Since we can identify the update operation, there is a possibility |
| // that we can generate the atomicrmw instruction. |
| mlir::Operation &innerOp = *opInst.getRegion().front().begin(); |
| if (!llvm::is_contained(innerOp.getOperands(), |
| opInst.getRegion().getArgument(0))) { |
| return opInst.emitError("no atomic update operation with region argument" |
| " as operand found inside atomic.update region"); |
| } |
| binop = convertBinOpToAtomic(innerOp); |
| isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0); |
| mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); |
| llvmExpr = moduleTranslation.lookupValue(mlirExpr); |
| } else { |
| // Since the update region includes more than one operation |
| // we will resort to generating a cmpxchg loop. |
| binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; |
| } |
| llvmX = moduleTranslation.lookupValue(opInst.getX()); |
| llvmXElementType = moduleTranslation.convertType( |
| opInst.getRegion().getArgument(0).getType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| |
| llvm::AtomicOrdering atomicOrdering = |
| convertAtomicOrdering(opInst.getMemoryOrder()); |
| |
| // Generate update code. |
| auto updateFn = |
| [&opInst, &moduleTranslation]( |
| llvm::Value *atomicx, |
| llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> { |
| Block &bb = *opInst.getRegion().begin(); |
| moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx); |
| moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock(bb, true, builder))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); |
| assert(yieldop && yieldop.getResults().size() == 1 && |
| "terminator must be omp.yield op and it must have exactly one " |
| "argument"); |
| return moduleTranslation.lookupValue(yieldop.getResults()[0]); |
| }; |
| |
| // Handle ambiguous alloca, if any. |
| auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr, |
| atomicOrdering, binop, updateFn, |
| isXBinopExpr); |
| |
| if (failed(handleError(afterIP, *opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| static LogicalResult |
| convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| if (failed(checkImplementationStatus(*atomicCaptureOp))) |
| return failure(); |
| |
| mlir::Value mlirExpr; |
| bool isXBinopExpr = false, isPostfixUpdate = false; |
| llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; |
| |
| omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); |
| omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); |
| |
| assert((atomicUpdateOp || atomicWriteOp) && |
| "internal op must be an atomic.update or atomic.write op"); |
| |
| if (atomicWriteOp) { |
| isPostfixUpdate = true; |
| mlirExpr = atomicWriteOp.getExpr(); |
| } else { |
| isPostfixUpdate = atomicCaptureOp.getSecondOp() == |
| atomicCaptureOp.getAtomicUpdateOp().getOperation(); |
| auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations(); |
| // Find the binary update operation that uses the region argument |
| // and get the expression to update |
| if (innerOpList.size() == 2) { |
| mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin(); |
| if (!llvm::is_contained(innerOp.getOperands(), |
| atomicUpdateOp.getRegion().getArgument(0))) { |
| return atomicUpdateOp.emitError( |
| "no atomic update operation with region argument" |
| " as operand found inside atomic.update region"); |
| } |
| binop = convertBinOpToAtomic(innerOp); |
| isXBinopExpr = |
| innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0); |
| mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); |
| } else { |
| binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; |
| } |
| } |
| |
| llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); |
| llvm::Value *llvmX = |
| moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX()); |
| llvm::Value *llvmV = |
| moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV()); |
| llvm::Type *llvmXElementType = moduleTranslation.convertType( |
| atomicCaptureOp.getAtomicReadOp().getElementType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| |
| llvm::AtomicOrdering atomicOrdering = |
| convertAtomicOrdering(atomicCaptureOp.getMemoryOrder()); |
| |
| auto updateFn = |
| [&](llvm::Value *atomicx, |
| llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> { |
| if (atomicWriteOp) |
| return moduleTranslation.lookupValue(atomicWriteOp.getExpr()); |
| Block &bb = *atomicUpdateOp.getRegion().begin(); |
| moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(), |
| atomicx); |
| moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock(bb, true, builder))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); |
| assert(yieldop && yieldop.getResults().size() == 1 && |
| "terminator must be omp.yield op and it must have exactly one " |
| "argument"); |
| return moduleTranslation.lookupValue(yieldop.getResults()[0]); |
| }; |
| |
| // Handle ambiguous alloca, if any. |
| auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createAtomicCapture( |
| ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, |
| binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr); |
| |
| if (failed(handleError(afterIP, *atomicCaptureOp))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| static llvm::omp::Directive convertCancellationConstructType( |
| omp::ClauseCancellationConstructType directive) { |
| switch (directive) { |
| case omp::ClauseCancellationConstructType::Loop: |
| return llvm::omp::Directive::OMPD_for; |
| case omp::ClauseCancellationConstructType::Parallel: |
| return llvm::omp::Directive::OMPD_parallel; |
| case omp::ClauseCancellationConstructType::Sections: |
| return llvm::omp::Directive::OMPD_sections; |
| case omp::ClauseCancellationConstructType::Taskgroup: |
| return llvm::omp::Directive::OMPD_taskgroup; |
| } |
| } |
| |
| static LogicalResult |
| convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| if (failed(checkImplementationStatus(*op.getOperation()))) |
| return failure(); |
| |
| llvm::Value *ifCond = nullptr; |
| if (Value ifVar = op.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| |
| llvm::omp::Directive cancelledDirective = |
| convertCancellationConstructType(op.getCancelDirective()); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective); |
| |
| if (failed(handleError(afterIP, *op.getOperation()))) |
| return failure(); |
| |
| builder.restoreIP(afterIP.get()); |
| |
| return success(); |
| } |
| |
| /// Converts an OpenMP Threadprivate operation into LLVM IR using |
| /// OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst); |
| |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| Value symAddr = threadprivateOp.getSymAddr(); |
| auto *symOp = symAddr.getDefiningOp(); |
| |
| if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp)) |
| symOp = asCast.getOperand().getDefiningOp(); |
| |
| if (!isa<LLVM::AddressOfOp>(symOp)) |
| return opInst.emitError("Addressing symbol not found"); |
| LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp); |
| |
| LLVM::GlobalOp global = |
| addressOfOp.getGlobal(moduleTranslation.symbolTable()); |
| llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); |
| |
| if (!ompBuilder->Config.isTargetDevice()) { |
| llvm::Type *type = globalValue->getValueType(); |
| llvm::TypeSize typeSize = |
| builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( |
| type); |
| llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue()); |
| llvm::Value *callInst = ompBuilder->createCachedThreadPrivate( |
| ompLoc, globalValue, size, global.getSymName() + ".cache"); |
| moduleTranslation.mapValue(opInst.getResult(0), callInst); |
| } else { |
| moduleTranslation.mapValue(opInst.getResult(0), globalValue); |
| } |
| |
| return success(); |
| } |
| |
| static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind |
| convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) { |
| switch (deviceClause) { |
| case mlir::omp::DeclareTargetDeviceType::host: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; |
| break; |
| case mlir::omp::DeclareTargetDeviceType::nohost: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; |
| break; |
| case mlir::omp::DeclareTargetDeviceType::any: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; |
| break; |
| } |
| llvm_unreachable("unhandled device clause"); |
| } |
| |
| static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind |
| convertToCaptureClauseKind( |
| mlir::omp::DeclareTargetCaptureClause captureClause) { |
| switch (captureClause) { |
| case mlir::omp::DeclareTargetCaptureClause::to: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; |
| case mlir::omp::DeclareTargetCaptureClause::link: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; |
| case mlir::omp::DeclareTargetCaptureClause::enter: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; |
| } |
| llvm_unreachable("unhandled capture clause"); |
| } |
| |
| static llvm::SmallString<64> |
| getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, |
| llvm::OpenMPIRBuilder &ompBuilder) { |
| llvm::SmallString<64> suffix; |
| llvm::raw_svector_ostream os(suffix); |
| if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) { |
| auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>(); |
| auto fileInfoCallBack = [&loc]() { |
| return std::pair<std::string, uint64_t>( |
| llvm::StringRef(loc.getFilename()), loc.getLine()); |
| }; |
| |
| os << llvm::format( |
| "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID); |
| } |
| os << "_decl_tgt_ref_ptr"; |
| |
| return suffix; |
| } |
| |
| static bool isDeclareTargetLink(mlir::Value value) { |
| if (auto addressOfOp = |
| llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) { |
| auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>(); |
| Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName()); |
| if (auto declareTargetGlobal = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp)) |
| if (declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::link) |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns the reference pointer generated by the lowering of the declare target |
| // operation in cases where the link clause is used or the to clause is used in |
| // USM mode. |
| static llvm::Value * |
| getRefPtrIfDeclareTarget(mlir::Value value, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // An easier way to do this may just be to keep track of any pointer |
| // references and their mapping to their respective operation |
| if (auto addressOfOp = |
| llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) { |
| if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>( |
| addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol( |
| addressOfOp.getGlobalName()))) { |
| |
| if (auto declareTargetGlobal = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( |
| gOp.getOperation())) { |
| |
| // In this case, we must utilise the reference pointer generated by the |
| // declare target operation, similar to Clang |
| if ((declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::link) || |
| (declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::to && |
| ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { |
| llvm::SmallString<64> suffix = |
| getDeclareTargetRefPtrSuffix(gOp, *ompBuilder); |
| |
| if (gOp.getSymName().contains(suffix)) |
| return moduleTranslation.getLLVMModule()->getNamedValue( |
| gOp.getSymName()); |
| |
| return moduleTranslation.getLLVMModule()->getNamedValue( |
| (gOp.getSymName().str() + suffix.str()).str()); |
| } |
| } |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| namespace { |
| // Append customMappers information to existing MapInfosTy |
| struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy { |
| SmallVector<Operation *, 4> Mappers; |
| |
| /// Append arrays in \a CurInfo. |
| void append(MapInfosTy &curInfo) { |
| Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end()); |
| llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo); |
| } |
| }; |
| // A small helper structure to contain data gathered |
| // for map lowering and coalese it into one area and |
| // avoiding extra computations such as searches in the |
| // llvm module for lowered mapped variables or checking |
| // if something is declare target (and retrieving the |
| // value) more than neccessary. |
| struct MapInfoData : MapInfosTy { |
| llvm::SmallVector<bool, 4> IsDeclareTarget; |
| llvm::SmallVector<bool, 4> IsAMember; |
| // Identify if mapping was added by mapClause or use_device clauses. |
| llvm::SmallVector<bool, 4> IsAMapping; |
| llvm::SmallVector<mlir::Operation *, 4> MapClause; |
| llvm::SmallVector<llvm::Value *, 4> OriginalValue; |
| // Stripped off array/pointer to get the underlying |
| // element type |
| llvm::SmallVector<llvm::Type *, 4> BaseType; |
| |
| /// Append arrays in \a CurInfo. |
| void append(MapInfoData &CurInfo) { |
| IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(), |
| CurInfo.IsDeclareTarget.end()); |
| MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end()); |
| OriginalValue.append(CurInfo.OriginalValue.begin(), |
| CurInfo.OriginalValue.end()); |
| BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end()); |
| MapInfosTy::append(CurInfo); |
| } |
| }; |
| } // namespace |
| |
| uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) { |
| if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>( |
| arrTy.getElementType())) |
| return getArrayElementSizeInBits(nestedArrTy, dl); |
| return dl.getTypeSizeInBits(arrTy.getElementType()); |
| } |
| |
| // This function calculates the size to be offloaded for a specified type, given |
| // its associated map clause (which can contain bounds information which affects |
| // the total size), this size is calculated based on the underlying element type |
| // e.g. given a 1-D array of ints, we will calculate the size from the integer |
| // type * number of elements in the array. This size can be used in other |
| // calculations but is ultimately used as an argument to the OpenMP runtimes |
| // kernel argument structure which is generated through the combinedInfo data |
| // structures. |
| // This function is somewhat equivalent to Clang's getExprTypeSize inside of |
| // CGOpenMPRuntime.cpp. |
| llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, |
| Operation *clauseOp, llvm::Value *basePointer, |
| llvm::Type *baseType, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (auto memberClause = |
| mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) { |
| // This calculates the size to transfer based on bounds and the underlying |
| // element type, provided bounds have been specified (Fortran |
| // pointers/allocatables/target and arrays that have sections specified fall |
| // into this as well). |
| if (!memberClause.getBounds().empty()) { |
| llvm::Value *elementCount = builder.getInt64(1); |
| for (auto bounds : memberClause.getBounds()) { |
| if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( |
| bounds.getDefiningOp())) { |
| // The below calculation for the size to be mapped calculated from the |
| // map.info's bounds is: (elemCount * [UB - LB] + 1), later we |
| // multiply by the underlying element types byte size to get the full |
| // size to be offloaded based on the bounds |
| elementCount = builder.CreateMul( |
| elementCount, |
| builder.CreateAdd( |
| builder.CreateSub( |
| moduleTranslation.lookupValue(boundOp.getUpperBound()), |
| moduleTranslation.lookupValue(boundOp.getLowerBound())), |
| builder.getInt64(1))); |
| } |
| } |
| |
| // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives |
| // the size in inconsistent byte or bit format. |
| uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); |
| if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) |
| underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); |
| |
| // The size in bytes x number of elements, the sizeInBytes stored is |
| // the underyling types size, e.g. if ptr<i32>, it'll be the i32's |
| // size, so we do some on the fly runtime math to get the size in |
| // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need |
| // some adjustment for members with more complex types. |
| return builder.CreateMul(elementCount, |
| builder.getInt64(underlyingTypeSzInBits / 8)); |
| } |
| } |
| |
| return builder.getInt64(dl.getTypeSizeInBits(type) / 8); |
| } |
| |
| static void collectMapDataFromMapOperands( |
| MapInfoData &mapData, SmallVectorImpl<Value> &mapVars, |
| LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, |
| llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {}, |
| ArrayRef<Value> useDevAddrOperands = {}, |
| ArrayRef<Value> hasDevAddrOperands = {}) { |
| auto checkIsAMember = [](const auto &mapVars, auto mapOp) { |
| // Check if this is a member mapping and correctly assign that it is, if |
| // it is a member of a larger object. |
| // TODO: Need better handling of members, and distinguishing of members |
| // that are implicitly allocated on device vs explicitly passed in as |
| // arguments. |
| // TODO: May require some further additions to support nested record |
| // types, i.e. member maps that can have member maps. |
| for (Value mapValue : mapVars) { |
| auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp()); |
| for (auto member : map.getMembers()) |
| if (member == mapOp) |
| return true; |
| } |
| return false; |
| }; |
| |
| // Process MapOperands |
| for (Value mapValue : mapVars) { |
| auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp()); |
| Value offloadPtr = |
| mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr(); |
| mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr)); |
| mapData.Pointers.push_back(mapData.OriginalValue.back()); |
| |
| if (llvm::Value *refPtr = |
| getRefPtrIfDeclareTarget(offloadPtr, |
| moduleTranslation)) { // declare target |
| mapData.IsDeclareTarget.push_back(true); |
| mapData.BasePointers.push_back(refPtr); |
| } else { // regular mapped variable |
| mapData.IsDeclareTarget.push_back(false); |
| mapData.BasePointers.push_back(mapData.OriginalValue.back()); |
| } |
| |
| mapData.BaseType.push_back( |
| moduleTranslation.convertType(mapOp.getVarType())); |
| mapData.Sizes.push_back( |
| getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(), |
| mapData.BaseType.back(), builder, moduleTranslation)); |
| mapData.MapClause.push_back(mapOp.getOperation()); |
| mapData.Types.push_back( |
| llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType())); |
| mapData.Names.push_back(LLVM::createMappingInformation( |
| mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); |
| mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| if (mapOp.getMapperId()) |
| mapData.Mappers.push_back( |
| SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>( |
| mapOp, mapOp.getMapperIdAttr())); |
| else |
| mapData.Mappers.push_back(nullptr); |
| mapData.IsAMapping.push_back(true); |
| mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp)); |
| } |
| |
| auto findMapInfo = [&mapData](llvm::Value *val, |
| llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) { |
| unsigned index = 0; |
| bool found = false; |
| for (llvm::Value *basePtr : mapData.OriginalValue) { |
| if (basePtr == val && mapData.IsAMapping[index]) { |
| found = true; |
| mapData.Types[index] |= |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; |
| mapData.DevicePointers[index] = devInfoTy; |
| } |
| index++; |
| } |
| return found; |
| }; |
| |
| // Process useDevPtr(Addr)Operands |
| auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands, |
| llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) { |
| for (Value mapValue : useDevOperands) { |
| auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp()); |
| Value offloadPtr = |
| mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr(); |
| llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr); |
| |
| // Check if map info is already present for this entry. |
| if (!findMapInfo(origValue, devInfoTy)) { |
| mapData.OriginalValue.push_back(origValue); |
| mapData.Pointers.push_back(mapData.OriginalValue.back()); |
| mapData.IsDeclareTarget.push_back(false); |
| mapData.BasePointers.push_back(mapData.OriginalValue.back()); |
| mapData.BaseType.push_back( |
| moduleTranslation.convertType(mapOp.getVarType())); |
| mapData.Sizes.push_back(builder.getInt64(0)); |
| mapData.MapClause.push_back(mapOp.getOperation()); |
| mapData.Types.push_back( |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); |
| mapData.Names.push_back(LLVM::createMappingInformation( |
| mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); |
| mapData.DevicePointers.push_back(devInfoTy); |
| mapData.Mappers.push_back(nullptr); |
| mapData.IsAMapping.push_back(false); |
| mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp)); |
| } |
| } |
| }; |
| |
| addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address); |
| addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer); |
| |
| for (Value mapValue : hasDevAddrOperands) { |
| auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp()); |
| Value offloadPtr = |
| mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr(); |
| llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr); |
| auto mapType = |
| static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType()); |
| auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; |
| |
| mapData.OriginalValue.push_back(origValue); |
| mapData.BasePointers.push_back(origValue); |
| mapData.Pointers.push_back(origValue); |
| mapData.IsDeclareTarget.push_back(false); |
| mapData.BaseType.push_back( |
| moduleTranslation.convertType(mapOp.getVarType())); |
| mapData.Sizes.push_back( |
| builder.getInt64(dl.getTypeSize(mapOp.getVarType()))); |
| mapData.MapClause.push_back(mapOp.getOperation()); |
| if (llvm::to_underlying(mapType & mapTypeAlways)) { |
| // Descriptors are mapped with the ALWAYS flag, since they can get |
| // rematerialized, so the address of the decriptor for a given object |
| // may change from one place to another. |
| mapData.Types.push_back(mapType); |
| // Technically it's possible for a non-descriptor mapping to have |
| // both has-device-addr and ALWAYS, so lookup the mapper in case it |
| // exists. |
| if (mapOp.getMapperId()) { |
| mapData.Mappers.push_back( |
| SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>( |
| mapOp, mapOp.getMapperIdAttr())); |
| } else { |
| mapData.Mappers.push_back(nullptr); |
| } |
| } else { |
| mapData.Types.push_back( |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); |
| mapData.Mappers.push_back(nullptr); |
| } |
| mapData.Names.push_back(LLVM::createMappingInformation( |
| mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); |
| mapData.DevicePointers.push_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::Address); |
| mapData.IsAMapping.push_back(false); |
| mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp)); |
| } |
| } |
| |
| static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) { |
| auto *res = llvm::find(mapData.MapClause, memberOp); |
| assert(res != mapData.MapClause.end() && |
| "MapInfoOp for member not found in MapData, cannot return index"); |
| return std::distance(mapData.MapClause.begin(), res); |
| } |
| |
| static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, |
| bool first) { |
| ArrayAttr indexAttr = mapInfo.getMembersIndexAttr(); |
| // Only 1 member has been mapped, we can return it. |
| if (indexAttr.size() == 1) |
| return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp()); |
| |
| llvm::SmallVector<size_t> indices(indexAttr.size()); |
| std::iota(indices.begin(), indices.end(), 0); |
| |
| llvm::sort(indices.begin(), indices.end(), |
| [&](const size_t a, const size_t b) { |
| auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]); |
| auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]); |
| for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) { |
| int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt(); |
| int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt(); |
| |
| if (aIndex == bIndex) |
| continue; |
| |
| if (aIndex < bIndex) |
| return first; |
| |
| if (aIndex > bIndex) |
| return !first; |
| } |
| |
| // Iterated the up until the end of the smallest member and |
| // they were found to be equal up to that point, so select |
| // the member with the lowest index count, so the "parent" |
| return memberIndicesA.size() < memberIndicesB.size(); |
| }); |
| |
| return llvm::cast<omp::MapInfoOp>( |
| mapInfo.getMembers()[indices.front()].getDefiningOp()); |
| } |
| |
| /// This function calculates the array/pointer offset for map data provided |
| /// with bounds operations, e.g. when provided something like the following: |
| /// |
| /// Fortran |
| /// map(tofrom: array(2:5, 3:2)) |
| /// or |
| /// C++ |
| /// map(tofrom: array[1:4][2:3]) |
| /// We must calculate the initial pointer offset to pass across, this function |
| /// performs this using bounds. |
| /// |
| /// NOTE: which while specified in row-major order it currently needs to be |
| /// flipped for Fortran's column order array allocation and access (as |
| /// opposed to C++'s row-major, hence the backwards processing where order is |
| /// important). This is likely important to keep in mind for the future when |
| /// we incorporate a C++ frontend, both frontends will need to agree on the |
| /// ordering of generated bounds operations (one may have to flip them) to |
| /// make the below lowering frontend agnostic. The offload size |
| /// calcualtion may also have to be adjusted for C++. |
| std::vector<llvm::Value *> |
| calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, bool isArrayTy, |
| OperandRange bounds) { |
| std::vector<llvm::Value *> idx; |
| // There's no bounds to calculate an offset from, we can safely |
| // ignore and return no indices. |
| if (bounds.empty()) |
| return idx; |
| |
| // If we have an array type, then we have its type so can treat it as a |
| // normal GEP instruction where the bounds operations are simply indexes |
| // into the array. We currently do reverse order of the bounds, which |
| // I believe leans more towards Fortran's column-major in memory. |
| if (isArrayTy) { |
| idx.push_back(builder.getInt64(0)); |
| for (int i = bounds.size() - 1; i >= 0; --i) { |
| if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound())); |
| } |
| } |
| } else { |
| // If we do not have an array type, but we have bounds, then we're dealing |
| // with a pointer that's being treated like an array and we have the |
| // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base |
| // address (pointer pointing to the actual data) so we must caclulate the |
| // offset using a single index which the following two loops attempts to |
| // compute. |
| |
| // Calculates the size offset we need to make per row e.g. first row or |
| // column only needs to be offset by one, but the next would have to be |
| // the previous row/column offset multiplied by the extent of current row. |
| // |
| // For example ([1][10][100]): |
| // |
| // - First row/column we move by 1 for each index increment |
| // - Second row/column we move by 1 (first row/column) * 10 (extent/size of |
| // current) for 10 for each index increment |
| // - Third row/column we would move by 10 (second row/column) * |
| // (extent/size of current) 100 for 1000 for each index increment |
| std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)}; |
| for (size_t i = 1; i < bounds.size(); ++i) { |
| if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| dimensionIndexSizeOffset.push_back(builder.CreateMul( |
| moduleTranslation.lookupValue(boundOp.getExtent()), |
| dimensionIndexSizeOffset[i - 1])); |
| } |
| } |
| |
| // Now that we have calculated how much we move by per index, we must |
| // multiply each lower bound offset in indexes by the size offset we |
| // have calculated in the previous and accumulate the results to get |
| // our final resulting offset. |
| for (int i = bounds.size() - 1; i >= 0; --i) { |
| if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| if (idx.empty()) |
| idx.emplace_back(builder.CreateMul( |
| moduleTranslation.lookupValue(boundOp.getLowerBound()), |
| dimensionIndexSizeOffset[i])); |
| else |
| idx.back() = builder.CreateAdd( |
| idx.back(), builder.CreateMul(moduleTranslation.lookupValue( |
| boundOp.getLowerBound()), |
| dimensionIndexSizeOffset[i])); |
| } |
| } |
| } |
| |
| return idx; |
| } |
| |
| // This creates two insertions into the MapInfosTy data structure for the |
| // "parent" of a set of members, (usually a container e.g. |
| // class/structure/derived type) when subsequent members have also been |
| // explicitly mapped on the same map clause. Certain types, such as Fortran |
| // descriptors are mapped like this as well, however, the members are |
| // implicit as far as a user is concerned, but we must explicitly map them |
| // internally. |
| // |
| // This function also returns the memberOfFlag for this particular parent, |
| // which is utilised in subsequent member mappings (by modifying there map type |
| // with it) to indicate that a member is part of this parent and should be |
| // treated by the runtime as such. Important to achieve the correct mapping. |
| // |
| // This function borrows a lot from Clang's emitCombinedEntry function |
| // inside of CGOpenMPRuntime.cpp |
| static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( |
| LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, |
| MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) { |
| // Map the first segment of our structure |
| combinedInfo.Types.emplace_back( |
| isTargetParams |
| ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
| : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE); |
| combinedInfo.DevicePointers.emplace_back( |
| mapData.DevicePointers[mapDataIndex]); |
| combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]); |
| combinedInfo.Names.emplace_back(LLVM::createMappingInformation( |
| mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); |
| |
| // Calculate size of the parent object being mapped based on the |
| // addresses at runtime, highAddr - lowAddr = size. This of course |
| // doesn't factor in allocated data like pointers, hence the further |
| // processing of members specified by users, or in the case of |
| // Fortran pointers and allocatables, the mapping of the pointed to |
| // data by the descriptor (which itself, is a structure containing |
| // runtime information on the dynamically allocated data). |
| auto parentClause = |
| llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| llvm::Value *lowAddr, *highAddr; |
| if (!parentClause.getPartialMap()) { |
| lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex], |
| builder.getPtrTy()); |
| highAddr = builder.CreatePointerCast( |
| builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], |
| mapData.Pointers[mapDataIndex], 1), |
| builder.getPtrTy()); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); |
| } else { |
| auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| int firstMemberIdx = getMapDataMemberIdx( |
| mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); |
| lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx], |
| builder.getPtrTy()); |
| int lastMemberIdx = getMapDataMemberIdx( |
| mapData, getFirstOrLastMappedMemberPtr(mapOp, false)); |
| highAddr = builder.CreatePointerCast( |
| builder.CreateGEP(mapData.BaseType[lastMemberIdx], |
| mapData.Pointers[lastMemberIdx], builder.getInt64(1)), |
| builder.getPtrTy()); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]); |
| } |
| |
| llvm::Value *size = builder.CreateIntCast( |
| builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr), |
| builder.getInt64Ty(), |
| /*isSigned=*/false); |
| combinedInfo.Sizes.push_back(size); |
| |
| llvm::omp::OpenMPOffloadMappingFlags memberOfFlag = |
| ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1); |
| |
| // This creates the initial MEMBER_OF mapping that consists of |
| // the parent/top level container (same as above effectively, except |
| // with a fixed initial compile time size and separate maptype which |
| // indicates the true mape type (tofrom etc.). This parent mapping is |
| // only relevant if the structure in its totality is being mapped, |
| // otherwise the above suffices. |
| if (!parentClause.getPartialMap()) { |
| // TODO: This will need to be expanded to include the whole host of logic |
| // for the map flags that Clang currently supports (e.g. it should do some |
| // further case specific flag modifications). For the moment, it handles |
| // what we support as expected. |
| llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex]; |
| ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.DevicePointers.emplace_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| combinedInfo.Mappers.emplace_back(nullptr); |
| combinedInfo.Names.emplace_back(LLVM::createMappingInformation( |
| mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); |
| combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); |
| } |
| return memberOfFlag; |
| } |
| |
| // The intent is to verify if the mapped data being passed is a |
| // pointer -> pointee that requires special handling in certain cases, |
| // e.g. applying the OMP_MAP_PTR_AND_OBJ map type. |
| // |
| // There may be a better way to verify this, but unfortunately with |
| // opaque pointers we lose the ability to easily check if something is |
| // a pointer whilst maintaining access to the underlying type. |
| static bool checkIfPointerMap(omp::MapInfoOp mapOp) { |
| // If we have a varPtrPtr field assigned then the underlying type is a pointer |
| if (mapOp.getVarPtrPtr()) |
| return true; |
| |
| // If the map data is declare target with a link clause, then it's represented |
| // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has |
| // no relation to pointers. |
| if (isDeclareTargetLink(mapOp.getVarPtr())) |
| return true; |
| |
| return false; |
| } |
| |
| // This function is intended to add explicit mappings of members |
| static void processMapMembersWithParent( |
| LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, |
| MapInfoData &mapData, uint64_t mapDataIndex, |
| llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) { |
| |
| auto parentClause = |
| llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| for (auto mappedMembers : parentClause.getMembers()) { |
| auto memberClause = |
| llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp()); |
| int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); |
| |
| assert(memberDataIdx >= 0 && "could not find mapped member of structure"); |
| |
| // If we're currently mapping a pointer to a block of data, we must |
| // initially map the pointer, and then attatch/bind the data with a |
| // subsequent map to the pointer. This segment of code generates the |
| // pointer mapping, which can in certain cases be optimised out as Clang |
| // currently does in its lowering. However, for the moment we do not do so, |
| // in part as we currently have substantially less information on the data |
| // being mapped at this stage. |
| if (checkIfPointerMap(memberClause)) { |
| auto mapFlag = |
| llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType()); |
| mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; |
| ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.DevicePointers.emplace_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| combinedInfo.Mappers.emplace_back(nullptr); |
| combinedInfo.Names.emplace_back( |
| LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back( |
| mapData.BasePointers[mapDataIndex]); |
| combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]); |
| combinedInfo.Sizes.emplace_back(builder.getInt64( |
| moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize())); |
| } |
| |
| // Same MemberOfFlag to indicate its link with parent and other members |
| // of. |
| auto mapFlag = |
| llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType()); |
| mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; |
| ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); |
| if (checkIfPointerMap(memberClause)) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; |
| |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.DevicePointers.emplace_back( |
| mapData.DevicePointers[memberDataIdx]); |
| combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]); |
| combinedInfo.Names.emplace_back( |
| LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); |
| uint64_t basePointerIndex = |
| checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex; |
| combinedInfo.BasePointers.emplace_back( |
| mapData.BasePointers[basePointerIndex]); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); |
| |
| llvm::Value *size = mapData.Sizes[memberDataIdx]; |
| if (checkIfPointerMap(memberClause)) { |
| size = builder.CreateSelect( |
| builder.CreateIsNull(mapData.Pointers[memberDataIdx]), |
| builder.getInt64(0), size); |
| } |
| |
| combinedInfo.Sizes.emplace_back(size); |
| } |
| } |
| |
| static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, |
| MapInfosTy &combinedInfo, bool isTargetParams, |
| int mapDataParentIdx = -1) { |
| // Declare Target Mappings are excluded from being marked as |
| // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're |
| // marked with OMP_MAP_PTR_AND_OBJ instead. |
| auto mapFlag = mapData.Types[mapDataIdx]; |
| auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]); |
| |
| bool isPtrTy = checkIfPointerMap(mapInfoOp); |
| if (isPtrTy) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; |
| |
| if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx]) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; |
| |
| if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy && |
| !isPtrTy) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; |
| |
| // if we're provided a mapDataParentIdx, then the data being mapped is |
| // part of a larger object (in a parent <-> member mapping) and in this |
| // case our BasePointer should be the parent. |
| if (mapDataParentIdx >= 0) |
| combinedInfo.BasePointers.emplace_back( |
| mapData.BasePointers[mapDataParentIdx]); |
| else |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]); |
| |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]); |
| combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]); |
| combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]); |
| combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]); |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]); |
| } |
| |
| static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, |
| DataLayout &dl, MapInfosTy &combinedInfo, |
| MapInfoData &mapData, uint64_t mapDataIndex, |
| bool isTargetParams) { |
| auto parentClause = |
| llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| // If we have a partial map (no parent referenced in the map clauses of the |
| // directive, only members) and only a single member, we do not need to bind |
| // the map of the member to the parent, we can pass the member separately. |
| if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { |
| auto memberClause = llvm::cast<omp::MapInfoOp>( |
| parentClause.getMembers()[0].getDefiningOp()); |
| int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); |
| // Note: Clang treats arrays with explicit bounds that fall into this |
| // category as a parent with map case, however, it seems this isn't a |
| // requirement, and processing them as an individual map is fine. So, |
| // we will handle them as individual maps for the moment, as it's |
| // difficult for us to check this as we always require bounds to be |
| // specified currently and it's also marginally more optimal (single |
| // map rather than two). The difference may come from the fact that |
| // Clang maps array without bounds as pointers (which we do not |
| // currently do), whereas we treat them as arrays in all cases |
| // currently. |
| processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams, |
| mapDataIndex); |
| return; |
| } |
| |
| llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag = |
| mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl, |
| combinedInfo, mapData, mapDataIndex, isTargetParams); |
| processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl, |
| combinedInfo, mapData, mapDataIndex, |
| memberOfParentFlag); |
| } |
| |
| // This is a variation on Clang's GenerateOpenMPCapturedVars, which |
| // generates different operation (e.g. load/store) combinations for |
| // arguments to the kernel, based on map capture kinds which are then |
| // utilised in the combinedInfo in place of the original Map value. |
| static void |
| createAlteredByCaptureMap(MapInfoData &mapData, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder) { |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // if it's declare target, skip it, it's handled separately. |
| if (!mapData.IsDeclareTarget[i]) { |
| auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]); |
| omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType(); |
| bool isPtrTy = checkIfPointerMap(mapOp); |
| |
| // Currently handles array sectioning lowerbound case, but more |
| // logic may be required in the future. Clang invokes EmitLValue, |
| // which has specialised logic for special Clang types such as user |
| // defines, so it is possible we will have to extend this for |
| // structures or other complex types. As the general idea is that this |
| // function mimics some of the logic from Clang that we require for |
| // kernel argument passing from host -> device. |
| switch (captureKind) { |
| case omp::VariableCaptureKind::ByRef: { |
| llvm::Value *newV = mapData.Pointers[i]; |
| std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset( |
| moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(), |
| mapOp.getBounds()); |
| if (isPtrTy) |
| newV = builder.CreateLoad(builder.getPtrTy(), newV); |
| |
| if (!offsetIdx.empty()) |
| newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx, |
| "array_offset"); |
| mapData.Pointers[i] = newV; |
| } break; |
| case omp::VariableCaptureKind::ByCopy: { |
| llvm::Type *type = mapData.BaseType[i]; |
| llvm::Value *newV; |
| if (mapData.Pointers[i]->getType()->isPointerTy()) |
| newV = builder.CreateLoad(type, mapData.Pointers[i]); |
| else |
| newV = mapData.Pointers[i]; |
| |
| if (!isPtrTy) { |
| auto curInsert = builder.saveIP(); |
| builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); |
| auto *memTempAlloc = |
| builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); |
| builder.restoreIP(curInsert); |
| |
| builder.CreateStore(newV, memTempAlloc); |
| newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); |
| } |
| |
| mapData.Pointers[i] = newV; |
| mapData.BasePointers[i] = newV; |
| } break; |
| case omp::VariableCaptureKind::This: |
| case omp::VariableCaptureKind::VLAType: |
| mapData.MapClause[i]->emitOpError("Unhandled capture kind"); |
| break; |
| } |
| } |
| } |
| } |
| |
| // Generate all map related information and fill the combinedInfo. |
| static void genMapInfos(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| DataLayout &dl, MapInfosTy &combinedInfo, |
| MapInfoData &mapData, bool isTargetParams = false) { |
| // We wish to modify some of the methods in which arguments are |
| // passed based on their capture type by the target region, this can |
| // involve generating new loads and stores, which changes the |
| // MLIR value to LLVM value mapping, however, we only wish to do this |
| // locally for the current function/target and also avoid altering |
| // ModuleTranslation, so we remap the base pointer or pointer stored |
| // in the map infos corresponding MapInfoData, which is later accessed |
| // by genMapInfos and createTarget to help generate the kernel and |
| // kernel arg structure. It primarily becomes relevant in cases like |
| // bycopy, or byref range'd arrays. In the default case, we simply |
| // pass thee pointer byref as both basePointer and pointer. |
| if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) |
| createAlteredByCaptureMap(mapData, moduleTranslation, builder); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // We operate under the assumption that all vectors that are |
| // required in MapInfoData are of equal lengths (either filled with |
| // default constructed data or appropiate information) so we can |
| // utilise the size from any component of MapInfoData, if we can't |
| // something is missing from the initial MapInfoData construction. |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // NOTE/TODO: We currently do not support arbitrary depth record |
| // type mapping. |
| if (mapData.IsAMember[i]) |
| continue; |
| |
| auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]); |
| if (!mapInfoOp.getMembers().empty()) { |
| processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl, |
| combinedInfo, mapData, i, isTargetParams); |
| continue; |
| } |
| |
| processIndividualMap(mapData, i, combinedInfo, isTargetParams); |
| } |
| } |
| |
| static llvm::Expected<llvm::Function *> |
| emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::StringRef mapperFuncName); |
| |
| static llvm::Expected<llvm::Function *> |
| getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto declMapperOp = cast<omp::DeclareMapperOp>(op); |
| std::string mapperFuncName = |
| moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName( |
| {"omp_mapper", declMapperOp.getSymName()}); |
| |
| if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName)) |
| return lookupFunc; |
| |
| return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation, |
| mapperFuncName); |
| } |
| |
| static llvm::Expected<llvm::Function *> |
| emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::StringRef mapperFuncName) { |
| auto declMapperOp = cast<omp::DeclareMapperOp>(op); |
| auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo(); |
| DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>()); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType()); |
| SmallVector<Value> mapVars = declMapperInfoOp.getMapVars(); |
| |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| |
| // Fill up the arrays with all the mapped variables. |
| MapInfosTy combinedInfo; |
| auto genMapInfoCB = |
| [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI, |
| llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy { |
| builder.restoreIP(codeGenIP); |
| moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI); |
| moduleTranslation.mapBlock(&declMapperOp.getRegion().front(), |
| builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(), |
| /*ignoreArguments=*/true, |
| builder))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| MapInfoData mapData; |
| collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl, |
| builder); |
| genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData); |
| |
| // Drop the mapping that is no longer necessary so that the same region can |
| // be processed multiple times. |
| moduleTranslation.forgetMapping(declMapperOp.getRegion()); |
| return combinedInfo; |
| }; |
| |
| auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> { |
| if (!combinedInfo.Mappers[i]) |
| return nullptr; |
| return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder, |
| moduleTranslation); |
| }; |
| |
| llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper( |
| genMapInfoCB, varType, mapperFuncName, customMapperCB); |
| if (!newFn) |
| return newFn.takeError(); |
| moduleTranslation.mapFunction(mapperFuncName, *newFn); |
| return *newFn; |
| } |
| |
| static LogicalResult |
| convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::Value *ifCond = nullptr; |
| int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF; |
| SmallVector<Value> mapVars; |
| SmallVector<Value> useDevicePtrVars; |
| SmallVector<Value> useDeviceAddrVars; |
| llvm::omp::RuntimeFunction RTLFn; |
| DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>()); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true, |
| /*SeparateBeginEndCalls=*/true); |
| |
| LogicalResult result = |
| llvm::TypeSwitch<Operation *, LogicalResult>(op) |
| .Case([&](omp::TargetDataOp dataOp) { |
| if (failed(checkImplementationStatus(*dataOp))) |
| return failure(); |
| |
| if (auto ifVar = dataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| |
| if (auto devId = dataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| mapVars = dataOp.getMapVars(); |
| useDevicePtrVars = dataOp.getUseDevicePtrVars(); |
| useDeviceAddrVars = dataOp.getUseDeviceAddrVars(); |
| return success(); |
| }) |
| .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult { |
| if (failed(checkImplementationStatus(*enterDataOp))) |
| return failure(); |
| |
| if (auto ifVar = enterDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| |
| if (auto devId = enterDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| RTLFn = |
| enterDataOp.getNowait() |
| ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper |
| : llvm::omp::OMPRTL___tgt_target_data_begin_mapper; |
| mapVars = enterDataOp.getMapVars(); |
| info.HasNoWait = enterDataOp.getNowait(); |
| return success(); |
| }) |
| .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult { |
| if (failed(checkImplementationStatus(*exitDataOp))) |
| return failure(); |
| |
| if (auto ifVar = exitDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| |
| if (auto devId = exitDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| RTLFn = exitDataOp.getNowait() |
| ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper |
| : llvm::omp::OMPRTL___tgt_target_data_end_mapper; |
| mapVars = exitDataOp.getMapVars(); |
| info.HasNoWait = exitDataOp.getNowait(); |
| return success(); |
| }) |
| .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult { |
| if (failed(checkImplementationStatus(*updateDataOp))) |
| return failure(); |
| |
| if (auto ifVar = updateDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifVar); |
| |
| if (auto devId = updateDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| RTLFn = |
| updateDataOp.getNowait() |
| ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper |
| : llvm::omp::OMPRTL___tgt_target_data_update_mapper; |
| mapVars = updateDataOp.getMapVars(); |
| info.HasNoWait = updateDataOp.getNowait(); |
| return success(); |
| }) |
| .Default([&](Operation *op) { |
| llvm_unreachable("unexpected operation"); |
| return failure(); |
| }); |
| |
| if (failed(result)) |
| return failure(); |
| |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| MapInfoData mapData; |
| collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL, |
| builder, useDevicePtrVars, useDeviceAddrVars); |
| |
| // Fill up the arrays with all the mapped variables. |
| MapInfosTy combinedInfo; |
| auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & { |
| builder.restoreIP(codeGenIP); |
| genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData); |
| return combinedInfo; |
| }; |
| |
| // Define a lambda to apply mappings between use_device_addr and |
| // use_device_ptr base pointers, and their associated block arguments. |
| auto mapUseDevice = |
| [&moduleTranslation]( |
| llvm::OpenMPIRBuilder::DeviceInfoTy type, |
| llvm::ArrayRef<BlockArgument> blockArgs, |
| llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData, |
| llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) { |
| for (auto [arg, useDevVar] : |
| llvm::zip_equal(blockArgs, useDeviceVars)) { |
| |
| auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) { |
| return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr() |
| : mapInfoOp.getVarPtr(); |
| }; |
| |
| auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp()); |
| for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal( |
| mapInfoData.MapClause, mapInfoData.DevicePointers, |
| mapInfoData.BasePointers)) { |
| auto mapOp = cast<omp::MapInfoOp>(mapClause); |
| if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) || |
| devicePointer != type) |
| continue; |
| |
| if (llvm::Value *devPtrInfoMap = |
| mapper ? mapper(basePointer) : basePointer) { |
| moduleTranslation.mapValue(arg, devPtrInfoMap); |
| break; |
| } |
| } |
| } |
| }; |
| |
| using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; |
| auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| builder.restoreIP(codeGenIP); |
| assert(isa<omp::TargetDataOp>(op) && |
| "BodyGen requested for non TargetDataOp"); |
| auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op); |
| Region ®ion = cast<omp::TargetDataOp>(op).getRegion(); |
| switch (bodyGenType) { |
| case BodyGenTy::Priv: |
| // Check if any device ptr/addr info is available |
| if (!info.DevicePtrInfoMap.empty()) { |
| mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address, |
| blockArgIface.getUseDeviceAddrBlockArgs(), |
| useDeviceAddrVars, mapData, |
| [&](llvm::Value *basePointer) -> llvm::Value * { |
| if (!info.DevicePtrInfoMap[basePointer].second) |
| return nullptr; |
| return builder.CreateLoad( |
| builder.getPtrTy(), |
| info.DevicePtrInfoMap[basePointer].second); |
| }); |
| mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer, |
| blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars, |
| mapData, [&](llvm::Value *basePointer) { |
| return info.DevicePtrInfoMap[basePointer].second; |
| }); |
| |
| if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, |
| moduleTranslation))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| } |
| break; |
| case BodyGenTy::DupNoPriv: |
| // We must always restoreIP regardless of doing anything the caller |
| // does not restore it, leading to incorrect (no) branch generation. |
| builder.restoreIP(codeGenIP); |
| break; |
| case BodyGenTy::NoPriv: |
| // If device info is available then region has already been generated |
| if (info.DevicePtrInfoMap.empty()) { |
| // For device pass, if use_device_ptr(addr) mappings were present, |
| // we need to link them here before codegen. |
| if (ompBuilder->Config.IsTargetDevice.value_or(false)) { |
| mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address, |
| blockArgIface.getUseDeviceAddrBlockArgs(), |
| useDeviceAddrVars, mapData); |
| mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer, |
| blockArgIface.getUseDevicePtrBlockArgs(), |
| useDevicePtrVars, mapData); |
| } |
| |
| if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, |
| moduleTranslation))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| } |
| break; |
| } |
| return builder.saveIP(); |
| }; |
| |
| auto customMapperCB = |
| [&](unsigned int i) -> llvm::Expected<llvm::Function *> { |
| if (!combinedInfo.Mappers[i]) |
| return nullptr; |
| info.HasMapper = true; |
| return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder, |
| moduleTranslation); |
| }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() { |
| if (isa<omp::TargetDataOp>(op)) |
| return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(), |
| builder.getInt64(deviceID), ifCond, |
| info, genMapInfoCB, customMapperCB, |
| /*MapperFunc=*/nullptr, bodyGenCB, |
| /*DeviceAddrCB=*/nullptr); |
| return ompBuilder->createTargetData( |
| ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, |
| info, genMapInfoCB, customMapperCB, &RTLFn); |
| }(); |
| |
| if (failed(handleError(afterIP, *op))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| return success(); |
| } |
| |
| static LogicalResult |
| convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| auto distributeOp = cast<omp::DistributeOp>(opInst); |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| /// Process teams op reduction in distribute if the reduction is contained in |
| /// the distribute op. |
| omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>(); |
| bool doDistributeReduction = |
| teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false; |
| |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0; |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| SmallVector<llvm::Value *> privateReductionVariables(numReductionVars); |
| llvm::ArrayRef<bool> isByRef; |
| |
| if (doDistributeReduction) { |
| isByRef = getIsByRef(teamsOp.getReductionByref()); |
| assert(isByRef.size() == teamsOp.getNumReductionVars()); |
| |
| collectReductionDecls(teamsOp, reductionDecls); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| MutableArrayRef<BlockArgument> reductionArgs = |
| llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp) |
| .getReductionBlockArgs(); |
| |
| if (failed(allocAndInitializeReductionVars( |
| teamsOp, reductionArgs, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, reductionVariableMap, |
| isByRef))) |
| return failure(); |
| } |
| |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto bodyGenCB = [&](InsertPointTy allocaIP, |
| InsertPointTy codeGenIP) -> llvm::Error { |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // DistributeOp has only one region associated with it. |
| builder.restoreIP(codeGenIP); |
| PrivateVarsInfo privVarsInfo(distributeOp); |
| |
| llvm::Expected<llvm::BasicBlock *> afterAllocas = |
| allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP); |
| if (handleError(afterAllocas, opInst).failed()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo), |
| opInst) |
| .failed()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| if (failed(copyFirstPrivateVars( |
| builder, moduleTranslation, privVarsInfo.mlirVars, |
| privVarsInfo.llvmVars, privVarsInfo.privatizers))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::Expected<llvm::BasicBlock *> regionBlock = |
| convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region", |
| builder, moduleTranslation); |
| if (!regionBlock) |
| return regionBlock.takeError(); |
| builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); |
| |
| // Skip applying a workshare loop below when translating 'distribute |
| // parallel do' (it's been already handled by this point while translating |
| // the nested omp.wsloop). |
| if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) { |
| // TODO: Add support for clauses which are valid for DISTRIBUTE |
| // constructs. Static schedule is the default. |
| auto schedule = omp::ClauseScheduleKind::Static; |
| bool isOrdered = false; |
| std::optional<omp::ScheduleModifier> scheduleMod; |
| bool isSimd = false; |
| llvm::omp::WorksharingLoopType workshareLoopType = |
| llvm::omp::WorksharingLoopType::DistributeStaticLoop; |
| bool loopNeedsBarrier = false; |
| llvm::Value *chunk = nullptr; |
| |
| llvm::CanonicalLoopInfo *loopInfo = |
| findCurrentLoopInfo(moduleTranslation); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = |
| ompBuilder->applyWorkshareLoop( |
| ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, |
| convertToScheduleKind(schedule), chunk, isSimd, |
| scheduleMod == omp::ScheduleModifier::monotonic, |
| scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, |
| workshareLoopType); |
| |
| if (!wsloopIP) |
| return wsloopIP.takeError(); |
| } |
| |
| if (failed(cleanupPrivateVars(builder, moduleTranslation, |
| distributeOp.getLoc(), privVarsInfo.llvmVars, |
| privVarsInfo.privatizers))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| return llvm::Error::success(); |
| }; |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| |
| if (doDistributeReduction) { |
| // Process the reductions if required. |
| return createReductionsAndCleanup( |
| teamsOp, builder, moduleTranslation, allocaIP, reductionDecls, |
| privateReductionVariables, isByRef, |
| /*isNoWait*/ false, /*isTeamsReduction*/ true); |
| } |
| return success(); |
| } |
| |
| /// Lowers the FlagsAttr which is applied to the module on the device |
| /// pass when offloading, this attribute contains OpenMP RTL globals that can |
| /// be passed as flags to the frontend, otherwise they are set to default |
| LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (!cast<mlir::ModuleOp>(op)) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device", |
| attribute.getOpenmpDeviceVersion()); |
| |
| if (attribute.getNoGpuLib()) |
| return success(); |
| |
| ompBuilder->createGlobalFlag( |
| attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/, |
| "__omp_rtl_debug_kind"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/ |
| , |
| "__omp_rtl_assume_teams_oversubscription"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/ |
| , |
| "__omp_rtl_assume_threads_oversubscription"); |
| ompBuilder->createGlobalFlag( |
| attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/, |
| "__omp_rtl_assume_no_thread_state"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/ |
| , |
| "__omp_rtl_assume_no_nested_parallelism"); |
| return success(); |
| } |
| |
| static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, |
| omp::TargetOp targetOp, |
| llvm::StringRef parentName = "") { |
| auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>(); |
| |
| assert(fileLoc && "No file found from location"); |
| StringRef fileName = fileLoc.getFilename().getValue(); |
| |
| llvm::sys::fs::UniqueID id; |
| uint64_t line = fileLoc.getLine(); |
| if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { |
| size_t fileHash = llvm::hash_value(fileName.str()); |
| size_t deviceId = 0xdeadf17e; |
| targetInfo = |
| llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line); |
| } else { |
| targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(), |
| id.getFile(), line); |
| } |
| } |
| |
| static void |
| handleDeclareTargetMapVar(MapInfoData &mapData, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, llvm::Function *func) { |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // In the case of declare target mapped variables, the basePointer is |
| // the reference pointer generated by the convertDeclareTargetAttr |
| // method. Whereas the kernelValue is the original variable, so for |
| // the device we must replace all uses of this original global variable |
| // (stored in kernelValue) with the reference pointer (stored in |
| // basePointer for declare target mapped variables), as for device the |
| // data is mapped into this reference pointer and should be loaded |
| // from it, the original variable is discarded. On host both exist and |
| // metadata is generated (elsewhere in the convertDeclareTargetAttr) |
| // function to link the two variables in the runtime and then both the |
| // reference pointer and the pointer are assigned in the kernel argument |
| // structure for the host. |
| if (mapData.IsDeclareTarget[i]) { |
| // If the original map value is a constant, then we have to make sure all |
| // of it's uses within the current kernel/function that we are going to |
| // rewrite are converted to instructions, as we will be altering the old |
| // use (OriginalValue) from a constant to an instruction, which will be |
| // illegal and ICE the compiler if the user is a constant expression of |
| // some kind e.g. a constant GEP. |
| if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i])) |
| convertUsersOfConstantsToInstructions(constant, func, false); |
| |
| // The users iterator will get invalidated if we modify an element, |
| // so we populate this vector of uses to alter each user on an |
| // individual basis to emit its own load (rather than one load for |
| // all). |
| llvm::SmallVector<llvm::User *> userVec; |
| for (llvm::User *user : mapData.OriginalValue[i]->users()) |
| userVec.push_back(user); |
| |
| for (llvm::User *user : userVec) { |
| if (auto *insn = dyn_cast<llvm::Instruction>(user)) { |
| if (insn->getFunction() == func) { |
| auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), |
| mapData.BasePointers[i]); |
| load->moveBefore(insn->getIterator()); |
| user->replaceUsesOfWith(mapData.OriginalValue[i], load); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // The createDeviceArgumentAccessor function generates |
| // instructions for retrieving (acessing) kernel |
| // arguments inside of the device kernel for use by |
| // the kernel. This enables different semantics such as |
| // the creation of temporary copies of data allowing |
| // semantics like read-only/no host write back kernel |
| // arguments. |
| // |
| // This currently implements a very light version of Clang's |
| // EmitParmDecl's handling of direct argument handling as well |
| // as a portion of the argument access generation based on |
| // capture types found at the end of emitOutlinedFunctionPrologue |
| // in Clang. The indirect path handling of EmitParmDecl's may be |
| // required for future work, but a direct 1-to-1 copy doesn't seem |
| // possible as the logic is rather scattered throughout Clang's |
| // lowering and perhaps we wish to deviate slightly. |
| // |
| // \param mapData - A container containing vectors of information |
| // corresponding to the input argument, which should have a |
| // corresponding entry in the MapInfoData containers |
| // OrigialValue's. |
| // \param arg - This is the generated kernel function argument that |
| // corresponds to the passed in input argument. We generated different |
| // accesses of this Argument, based on capture type and other Input |
| // related information. |
| // \param input - This is the host side value that will be passed to |
| // the kernel i.e. the kernel input, we rewrite all uses of this within |
| // the kernel (as we generate the kernel body based on the target's region |
| // which maintians references to the original input) to the retVal argument |
| // apon exit of this function inside of the OMPIRBuilder. This interlinks |
| // the kernel argument to future uses of it in the function providing |
| // appropriate "glue" instructions inbetween. |
| // \param retVal - This is the value that all uses of input inside of the |
| // kernel will be re-written to, the goal of this function is to generate |
| // an appropriate location for the kernel argument to be accessed from, |
| // e.g. ByRef will result in a temporary allocation location and then |
| // a store of the kernel argument into this allocated memory which |
| // will then be loaded from, ByCopy will use the allocated memory |
| // directly. |
| static llvm::IRBuilderBase::InsertPoint |
| createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, |
| llvm::Value *input, llvm::Value *&retVal, |
| llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase::InsertPoint allocaIP, |
| llvm::IRBuilderBase::InsertPoint codeGenIP) { |
| builder.restoreIP(allocaIP); |
| |
| omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef; |
| LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator( |
| ompBuilder.M.getContext()); |
| unsigned alignmentValue = 0; |
| // Find the associated MapInfoData entry for the current input |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) |
| if (mapData.OriginalValue[i] == input) { |
| auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]); |
| capture = mapOp.getMapCaptureType(); |
| // Get information of alignment of mapped object |
| alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment( |
| mapOp.getVarType(), ompBuilder.M.getDataLayout()); |
| break; |
| } |
| |
| unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace(); |
| unsigned int defaultAS = |
| ompBuilder.M.getDataLayout().getProgramAddressSpace(); |
| |
| // Create the alloca for the argument the current point. |
| llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS); |
| |
| if (allocaAS != defaultAS && arg.getType()->isPointerTy()) |
| v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS)); |
| |
| builder.CreateStore(&arg, v); |
| |
| builder.restoreIP(codeGenIP); |
| |
| switch (capture) { |
| case omp::VariableCaptureKind::ByCopy: { |
| retVal = v; |
| break; |
| } |
| case omp::VariableCaptureKind::ByRef: { |
| llvm::LoadInst *loadInst = builder.CreateAlignedLoad( |
| v->getType(), v, |
| ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType())); |
| // CreateAlignedLoad function creates similar LLVM IR: |
| // %res = load ptr, ptr %input, align 8 |
| // This LLVM IR does not contain information about alignment |
| // of the loaded value. We need to add !align metadata to unblock |
| // optimizer. The existence of the !align metadata on the instruction |
| // tells the optimizer that the value loaded is known to be aligned to |
| // a boundary specified by the integer value in the metadata node. |
| // Example: |
| // %res = load ptr, ptr %input, align 8, !align !align_md_node |
| // ^ ^ |
| // | | |
| // alignment of %input address | |
| // | |
| // alignment of %res object |
| if (v->getType()->isPointerTy() && alignmentValue) { |
| llvm::MDBuilder MDB(builder.getContext()); |
| loadInst->setMetadata( |
| llvm::LLVMContext::MD_align, |
| llvm::MDNode::get(builder.getContext(), |
| MDB.createConstant(llvm::ConstantInt::get( |
| llvm::Type::getInt64Ty(builder.getContext()), |
| alignmentValue)))); |
| } |
| retVal = loadInst; |
| |
| break; |
| } |
| case omp::VariableCaptureKind::This: |
| case omp::VariableCaptureKind::VLAType: |
| // TODO: Consider returning error to use standard reporting for |
| // unimplemented features. |
| assert(false && "Currently unsupported capture kind"); |
| break; |
| } |
| |
| return builder.saveIP(); |
| } |
| |
| /// Follow uses of `host_eval`-defined block arguments of the given `omp.target` |
| /// operation and populate output variables with their corresponding host value |
| /// (i.e. operand evaluated outside of the target region), based on their uses |
| /// inside of the target region. |
| /// |
| /// Loop bounds and steps are only optionally populated, if output vectors are |
| /// provided. |
| static void |
| extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, |
| Value &numTeamsLower, Value &numTeamsUpper, |
| Value &threadLimit, |
| llvm::SmallVectorImpl<Value> *lowerBounds = nullptr, |
| llvm::SmallVectorImpl<Value> *upperBounds = nullptr, |
| llvm::SmallVectorImpl<Value> *steps = nullptr) { |
| auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp); |
| for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(), |
| blockArgIface.getHostEvalBlockArgs())) { |
| Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item); |
| |
| for (Operation *user : blockArg.getUsers()) { |
| llvm::TypeSwitch<Operation *>(user) |
| .Case([&](omp::TeamsOp teamsOp) { |
| if (teamsOp.getNumTeamsLower() == blockArg) |
| numTeamsLower = hostEvalVar; |
| else if (teamsOp.getNumTeamsUpper() == blockArg) |
| numTeamsUpper = hostEvalVar; |
| else if (teamsOp.getThreadLimit() == blockArg) |
| threadLimit = hostEvalVar; |
| else |
| llvm_unreachable("unsupported host_eval use"); |
| }) |
| .Case([&](omp::ParallelOp parallelOp) { |
| if (parallelOp.getNumThreads() == blockArg) |
| numThreads = hostEvalVar; |
| else |
| llvm_unreachable("unsupported host_eval use"); |
| }) |
| .Case([&](omp::LoopNestOp loopOp) { |
| auto processBounds = |
| [&](OperandRange opBounds, |
| llvm::SmallVectorImpl<Value> *outBounds) -> bool { |
| bool found = false; |
| for (auto [i, lb] : llvm::enumerate(opBounds)) { |
| if (lb == blockArg) { |
| found = true; |
| if (outBounds) |
| (*outBounds)[i] = hostEvalVar; |
| } |
| } |
| return found; |
| }; |
| bool found = |
| processBounds(loopOp.getLoopLowerBounds(), lowerBounds); |
| found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || |
| found; |
| found = processBounds(loopOp.getLoopSteps(), steps) || found; |
| (void)found; |
| assert(found && "unsupported host_eval use"); |
| }) |
| .Default([](Operation *) { |
| llvm_unreachable("unsupported host_eval use"); |
| }); |
| } |
| } |
| } |
| |
| /// If \p op is of the given type parameter, return it casted to that type. |
| /// Otherwise, if its immediate parent operation (or some other higher-level |
| /// parent, if \p immediateParent is false) is of that type, return that parent |
| /// casted to the given type. |
| /// |
| /// If \p op is \c null or neither it or its parent(s) are of the specified |
| /// type, return a \c null operation. |
| template <typename OpTy> |
| static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) { |
| if (!op) |
| return OpTy(); |
| |
| if (OpTy casted = dyn_cast<OpTy>(op)) |
| return casted; |
| |
| if (immediateParent) |
| return dyn_cast_if_present<OpTy>(op->getParentOp()); |
| |
| return op->getParentOfType<OpTy>(); |
| } |
| |
| /// If the given \p value is defined by an \c llvm.mlir.constant operation and |
| /// it is of an integer type, return its value. |
| static std::optional<int64_t> extractConstInteger(Value value) { |
| if (!value) |
| return std::nullopt; |
| |
| if (auto constOp = |
| dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp())) |
| if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| return constAttr.getInt(); |
| |
| return std::nullopt; |
| } |
| |
| static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) { |
| uint64_t sizeInBits = dl.getTypeSizeInBits(type); |
| uint64_t sizeInBytes = sizeInBits / 8; |
| return sizeInBytes; |
| } |
| |
| template <typename OpTy> |
| static uint64_t getReductionDataSize(OpTy &op) { |
| if (op.getNumReductionVars() > 0) { |
| SmallVector<omp::DeclareReductionOp> reductions; |
| collectReductionDecls(op, reductions); |
| |
| llvm::SmallVector<mlir::Type> members; |
| members.reserve(reductions.size()); |
| for (omp::DeclareReductionOp &red : reductions) |
| members.push_back(red.getType()); |
| Operation *opp = op.getOperation(); |
| auto structType = mlir::LLVM::LLVMStructType::getLiteral( |
| opp->getContext(), members, /*isPacked=*/false); |
| DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>()); |
| return getTypeByteSize(structType, dl); |
| } |
| return 0; |
| } |
| |
| /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default |
| /// values as stated by the corresponding clauses, if constant. |
| /// |
| /// These default values must be set before the creation of the outlined LLVM |
| /// function for the target region, so that they can be used to initialize the |
| /// corresponding global `ConfigurationEnvironmentTy` structure. |
| static void |
| initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, |
| llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, |
| bool isTargetDevice, bool isGPU) { |
| // TODO: Handle constant 'if' clauses. |
| |
| Value numThreads, numTeamsLower, numTeamsUpper, threadLimit; |
| if (!isTargetDevice) { |
| extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper, |
| threadLimit); |
| } else { |
| // In the target device, values for these clauses are not passed as |
| // host_eval, but instead evaluated prior to entry to the region. This |
| // ensures values are mapped and available inside of the target region. |
| if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) { |
| numTeamsLower = teamsOp.getNumTeamsLower(); |
| numTeamsUpper = teamsOp.getNumTeamsUpper(); |
| threadLimit = teamsOp.getThreadLimit(); |
| } |
| |
| if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp)) |
| numThreads = parallelOp.getNumThreads(); |
| } |
| |
| // Handle clauses impacting the number of teams. |
| |
| int32_t minTeamsVal = 1, maxTeamsVal = -1; |
| if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) { |
| // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match |
| // clang and set min and max to the same value. |
| if (numTeamsUpper) { |
| if (auto val = extractConstInteger(numTeamsUpper)) |
| minTeamsVal = maxTeamsVal = *val; |
| } else { |
| minTeamsVal = maxTeamsVal = 0; |
| } |
| } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp, |
| /*immediateParent=*/true) || |
| castOrGetParentOfType<omp::SimdOp>(capturedOp, |
| /*immediateParent=*/true)) { |
| minTeamsVal = maxTeamsVal = 1; |
| } else { |
| minTeamsVal = maxTeamsVal = -1; |
| } |
| |
| // Handle clauses impacting the number of threads. |
| |
| auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) { |
| if (!clauseValue) |
| return; |
| |
| if (auto val = extractConstInteger(clauseValue)) |
| result = *val; |
| |
| // Found an applicable clause, so it's not undefined. Mark as unknown |
| // because it's not constant. |
| if (result < 0) |
| result = 0; |
| }; |
| |
| // Extract 'thread_limit' clause from 'target' and 'teams' directives. |
| int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1; |
| setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal); |
| setMaxValueFromClause(threadLimit, teamsThreadLimitVal); |
| |
| // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD. |
| int32_t maxThreadsVal = -1; |
| if (castOrGetParentOfType<omp::ParallelOp>(capturedOp)) |
| setMaxValueFromClause(numThreads, maxThreadsVal); |
| else if (castOrGetParentOfType<omp::SimdOp>(capturedOp, |
| /*immediateParent=*/true)) |
| maxThreadsVal = 1; |
| |
| // For max values, < 0 means unset, == 0 means set but unknown. Select the |
| // minimum value between 'max_threads' and 'thread_limit' clauses that were |
| // set. |
| int32_t combinedMaxThreadsVal = targetThreadLimitVal; |
| if (combinedMaxThreadsVal < 0 || |
| (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal)) |
| combinedMaxThreadsVal = teamsThreadLimitVal; |
| |
| if (combinedMaxThreadsVal < 0 || |
| (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal)) |
| combinedMaxThreadsVal = maxThreadsVal; |
| |
| int32_t reductionDataSize = 0; |
| if (isGPU && capturedOp) { |
| if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) |
| reductionDataSize = getReductionDataSize(teamsOp); |
| } |
| |
| // Update kernel bounds structure for the `OpenMPIRBuilder` to use. |
| omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp); |
| assert( |
| omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic | |
| omp::TargetRegionFlags::spmd) && |
| "invalid kernel flags"); |
| attrs.ExecFlags = |
| omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic) |
| ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd) |
| ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD |
| : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC |
| : llvm::omp::OMP_TGT_EXEC_MODE_SPMD; |
| attrs.MinTeams = minTeamsVal; |
| attrs.MaxTeams.front() = maxTeamsVal; |
| attrs.MinThreads = 1; |
| attrs.MaxThreads.front() = combinedMaxThreadsVal; |
| attrs.ReductionDataSize = reductionDataSize; |
| // TODO: Allow modified buffer length similar to |
| // fopenmp-cuda-teams-reduction-recs-num flag in clang. |
| if (attrs.ReductionDataSize != 0) |
| attrs.ReductionBufferLength = 1024; |
| } |
| |
| /// Gather LLVM runtime values for all clauses evaluated in the host that are |
| /// passed to the kernel invocation. |
| /// |
| /// This function must be called only when compiling for the host. Also, it will |
| /// only provide correct results if it's called after the body of \c targetOp |
| /// has been fully generated. |
| static void |
| initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| omp::TargetOp targetOp, Operation *capturedOp, |
| llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) { |
| omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp); |
| unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0; |
| |
| Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit; |
| llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops), |
| steps(numLoops); |
| extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper, |
| teamsThreadLimit, &lowerBounds, &upperBounds, &steps); |
| |
| // TODO: Handle constant 'if' clauses. |
| if (Value targetThreadLimit = targetOp.getThreadLimit()) |
| attrs.TargetThreadLimit.front() = |
| moduleTranslation.lookupValue(targetThreadLimit); |
| |
| if (numTeamsLower) |
| attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower); |
| |
| if (numTeamsUpper) |
| attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper); |
| |
| if (teamsThreadLimit) |
| attrs.TeamsThreadLimit.front() = |
| moduleTranslation.lookupValue(teamsThreadLimit); |
| |
| if (numThreads) |
| attrs.MaxThreads = moduleTranslation.lookupValue(numThreads); |
| |
| if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp), |
| omp::TargetRegionFlags::trip_count)) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| attrs.LoopTripCount = nullptr; |
| |
| // To calculate the trip count, we multiply together the trip counts of |
| // every collapsed canonical loop. We don't need to create the loop nests |
| // here, since we're only interested in the trip count. |
| for (auto [loopLower, loopUpper, loopStep] : |
| llvm::zip_equal(lowerBounds, upperBounds, steps)) { |
| llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower); |
| llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper); |
| llvm::Value *step = moduleTranslation.lookupValue(loopStep); |
| |
| llvm::OpenMPIRBuilder::LocationDescription loc(builder); |
| llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount( |
| loc, lowerBound, upperBound, step, /*IsSigned=*/true, |
| loopOp.getLoopInclusive()); |
| |
| if (!attrs.LoopTripCount) { |
| attrs.LoopTripCount = tripCount; |
| continue; |
| } |
| |
| // TODO: Enable UndefinedSanitizer to diagnose an overflow here. |
| attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount, |
| {}, /*HasNUW=*/true); |
| } |
| } |
| } |
| |
| static LogicalResult |
| convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto targetOp = cast<omp::TargetOp>(opInst); |
| if (failed(checkImplementationStatus(opInst))) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| bool isTargetDevice = ompBuilder->Config.isTargetDevice(); |
| bool isGPU = ompBuilder->Config.isGPU(); |
| |
| auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>(); |
| auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst); |
| auto &targetRegion = targetOp.getRegion(); |
| // Holds the private vars that have been mapped along with the block argument |
| // that corresponds to the MapInfoOp corresponding to the private var in |
| // question. So, for instance: |
| // |
| // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..) |
| // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1) |
| // |
| // Then, %10 has been created so that the descriptor can be used by the |
| // privatizer @box.privatizer on the device side. Here we'd record {%6#0, |
| // %arg0} in the mappedPrivateVars map. |
| llvm::DenseMap<Value, Value> mappedPrivateVars; |
| DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>()); |
| SmallVector<Value> mapVars = targetOp.getMapVars(); |
| SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars(); |
| ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs(); |
| ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs(); |
| llvm::Function *llvmOutlinedFn = nullptr; |
| |
| // TODO: It can also be false if a compile-time constant `false` IF clause is |
| // specified. |
| bool isOffloadEntry = |
| isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); |
| |
| // For some private variables, the MapsForPrivatizedVariablesPass |
| // creates MapInfoOp instances. Go through the private variables and |
| // the mapped variables so that during codegeneration we are able |
| // to quickly look up the corresponding map variable, if any for each |
| // private variable. |
| if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) { |
| OperandRange privateVars = targetOp.getPrivateVars(); |
| std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms(); |
| std::optional<DenseI64ArrayAttr> privateMapIndices = |
| targetOp.getPrivateMapsAttr(); |
| |
| for (auto [privVarIdx, privVarSymPair] : |
| llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) { |
| auto privVar = std::get<0>(privVarSymPair); |
| auto privSym = std::get<1>(privVarSymPair); |
| |
| SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym); |
| omp::PrivateClauseOp privatizer = |
| findPrivatizer(targetOp, privatizerName); |
| |
| if (!privatizer.needsMap()) |
| continue; |
| |
| mlir::Value mappedValue = |
| targetOp.getMappedValueForPrivateVar(privVarIdx); |
| assert(mappedValue && "Expected to find mapped value for a privatized " |
| "variable that needs mapping"); |
| |
| // The MapInfoOp defining the map var isn't really needed later. |
| // So, we don't store it in any datastructure. Instead, we just |
| // do some sanity checks on it right now. |
| auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>(); |
| [[maybe_unused]] Type varType = mapInfoOp.getVarType(); |
| |
| // Check #1: Check that the type of the private variable matches |
| // the type of the variable being mapped. |
| if (!isa<LLVM::LLVMPointerType>(privVar.getType())) |
| assert( |
| varType == privVar.getType() && |
| "Type of private var doesn't match the type of the mapped value"); |
| |
| // Ok, only 1 sanity check for now. |
| // Record the block argument corresponding to this mapvar. |
| mappedPrivateVars.insert( |
| {privVar, |
| targetRegion.getArgument(argIface.getMapBlockArgsStart() + |
| (*privateMapIndices)[privVarIdx])}); |
| } |
| } |
| |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.SetCurrentDebugLocation(llvm::DebugLoc()); |
| // Forward target-cpu and target-features function attributes from the |
| // original function to the new outlined function. |
| llvm::Function *llvmParentFn = |
| moduleTranslation.lookupFunction(parentFn.getName()); |
| llvmOutlinedFn = codeGenIP.getBlock()->getParent(); |
| assert(llvmParentFn && llvmOutlinedFn && |
| "Both parent and outlined functions must exist at this point"); |
| |
| if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); |
| attr.isStringAttribute()) |
| llvmOutlinedFn->addFnAttr(attr); |
| |
| if (auto attr = llvmParentFn->getFnAttribute("target-features"); |
| attr.isStringAttribute()) |
| llvmOutlinedFn->addFnAttr(attr); |
| |
| for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) { |
| auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp()); |
| llvm::Value *mapOpValue = |
| moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); |
| moduleTranslation.mapValue(arg, mapOpValue); |
| } |
| for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) { |
| auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp()); |
| llvm::Value *mapOpValue = |
| moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); |
| moduleTranslation.mapValue(arg, mapOpValue); |
| } |
| |
| // Do privatization after moduleTranslation has already recorded |
| // mapped values. |
| PrivateVarsInfo privateVarsInfo(targetOp); |
| |
| llvm::Expected<llvm::BasicBlock *> afterAllocas = |
| allocatePrivateVars(builder, moduleTranslation, privateVarsInfo, |
| allocaIP, &mappedPrivateVars); |
| |
| if (failed(handleError(afterAllocas, *targetOp))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| builder.restoreIP(codeGenIP); |
| if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo, |
| &mappedPrivateVars), |
| *targetOp) |
| .failed()) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| if (failed(copyFirstPrivateVars( |
| builder, moduleTranslation, privateVarsInfo.mlirVars, |
| privateVarsInfo.llvmVars, privateVarsInfo.privatizers, |
| &mappedPrivateVars))) |
| return llvm::make_error<PreviouslyReportedError>(); |
| |
| SmallVector<Region *> privateCleanupRegions; |
| llvm::transform(privateVarsInfo.privatizers, |
| std::back_inserter(privateCleanupRegions), |
| [](omp::PrivateClauseOp privatizer) { |
| return &privatizer.getDeallocRegion(); |
| }); |
| |
| llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions( |
| targetRegion, "omp.target", builder, moduleTranslation); |
| |
| if (!exitBlock) |
| return exitBlock.takeError(); |
| |
| builder.SetInsertPoint(*exitBlock); |
| if (!privateCleanupRegions.empty()) { |
| if (failed(inlineOmpRegionCleanup( |
| privateCleanupRegions, privateVarsInfo.llvmVars, |
| moduleTranslation, builder, "omp.targetop.private.cleanup", |
| /*shouldLoadCleanupRegionArg=*/false))) { |
| return llvm::createStringError( |
| "failed to inline `dealloc` region of `omp.private` " |
| "op in the target region"); |
| } |
| return builder.saveIP(); |
| } |
| |
| return InsertPointTy(exitBlock.get(), exitBlock.get()->end()); |
| }; |
| |
| StringRef parentName = parentFn.getName(); |
| |
| llvm::TargetRegionEntryInfo entryInfo; |
| |
| getTargetEntryUniqueInfo(entryInfo, targetOp, parentName); |
| |
| MapInfoData mapData; |
| collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl, |
| builder, /*useDevPtrOperands=*/{}, |
| /*useDevAddrOperands=*/{}, hdaVars); |
| |
| MapInfosTy combinedInfos; |
| auto genMapInfoCB = |
| [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & { |
| builder.restoreIP(codeGenIP); |
| genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true); |
| return combinedInfos; |
| }; |
| |
| auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input, |
| llvm::Value *&retVal, InsertPointTy allocaIP, |
| InsertPointTy codeGenIP) |
| -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.SetCurrentDebugLocation(llvm::DebugLoc()); |
| // We just return the unaltered argument for the host function |
| // for now, some alterations may be required in the future to |
| // keep host fallback functions working identically to the device |
| // version (e.g. pass ByCopy values should be treated as such on |
| // host and device, currently not always the case) |
| if (!isTargetDevice) { |
| retVal = cast<llvm::Value>(&arg); |
| return codeGenIP; |
| } |
| |
| return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder, |
| *ompBuilder, moduleTranslation, |
| allocaIP, codeGenIP); |
| }; |
| |
| llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs; |
| llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs; |
| Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp(); |
| initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs, |
| isTargetDevice, isGPU); |
| |
| // Collect host-evaluated values needed to properly launch the kernel from the |
| // host. |
| if (!isTargetDevice) |
| initTargetRuntimeAttrs(builder, moduleTranslation, targetOp, |
| targetCapturedOp, runtimeAttrs); |
| |
| // Pass host-evaluated values as parameters to the kernel / host fallback, |
| // except if they are constants. In any case, map the MLIR block argument to |
| // the corresponding LLVM values. |
| llvm::SmallVector<llvm::Value *, 4> kernelInput; |
| SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars(); |
| ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs(); |
| for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) { |
| llvm::Value *value = moduleTranslation.lookupValue(var); |
| moduleTranslation.mapValue(arg, value); |
| |
| if (!llvm::isa<llvm::Constant>(value)) |
| kernelInput.push_back(value); |
| } |
| |
| for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) { |
| // declare target arguments are not passed to kernels as arguments |
| // TODO: We currently do not handle cases where a member is explicitly |
| // passed in as an argument, this will likley need to be handled in |
| // the near future, rather than using IsAMember, it may be better to |
| // test if the relevant BlockArg is used within the target region and |
| // then use that as a basis for exclusion in the kernel inputs. |
| if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i]) |
| kernelInput.push_back(mapData.OriginalValue[i]); |
| } |
| |
| SmallVector<llvm::OpenMPIRBuilder::DependData> dds; |
| buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(), |
| moduleTranslation, dds); |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| llvm::OpenMPIRBuilder::TargetDataInfo info( |
| /*RequiresDevicePointerInfo=*/false, |
| /*SeparateBeginEndCalls=*/true); |
| |
| auto customMapperCB = |
| [&](unsigned int i) -> llvm::Expected<llvm::Function *> { |
| if (!combinedInfos.Mappers[i]) |
| return nullptr; |
| info.HasMapper = true; |
| return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder, |
| moduleTranslation); |
| }; |
| |
| llvm::Value *ifCond = nullptr; |
| if (Value targetIfCond = targetOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(targetIfCond); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| moduleTranslation.getOpenMPBuilder()->createTarget( |
| ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo, |
| defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB, |
| argAccessorCB, customMapperCB, dds, targetOp.getNowait()); |
| |
| if (failed(handleError(afterIP, opInst))) |
| return failure(); |
| |
| builder.restoreIP(*afterIP); |
| |
| // Remap access operations to declare target reference pointers for the |
| // device, essentially generating extra loadop's as necessary |
| if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) |
| handleDeclareTargetMapVar(mapData, moduleTranslation, builder, |
| llvmOutlinedFn); |
| |
| return success(); |
| } |
| |
| static LogicalResult |
| convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // Amend omp.declare_target by deleting the IR of the outlined functions |
| // created for target regions. They cannot be filtered out from MLIR earlier |
| // because the omp.target operation inside must be translated to LLVM, but |
| // the wrapper functions themselves must not remain at the end of the |
| // process. We know that functions where omp.declare_target does not match |
| // omp.is_target_device at this stage can only be wrapper functions because |
| // those that aren't are removed earlier as an MLIR transformation pass. |
| if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) { |
| if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>( |
| op->getParentOfType<ModuleOp>().getOperation())) { |
| if (!offloadMod.getIsTargetDevice()) |
| return success(); |
| |
| omp::DeclareTargetDeviceType declareType = |
| attribute.getDeviceType().getValue(); |
| |
| if (declareType == omp::DeclareTargetDeviceType::host) { |
| llvm::Function *llvmFunc = |
| moduleTranslation.lookupFunction(funcOp.getName()); |
| llvmFunc->dropAllReferences(); |
| llvmFunc->eraseFromParent(); |
| } |
| } |
| return success(); |
| } |
| |
| if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) { |
| llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); |
| if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| bool isDeclaration = gOp.isDeclaration(); |
| bool isExternallyVisible = |
| gOp.getVisibility() != mlir::SymbolTable::Visibility::Private; |
| auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>(); |
| llvm::StringRef mangledName = gOp.getSymName(); |
| auto captureClause = |
| convertToCaptureClauseKind(attribute.getCaptureClause().getValue()); |
| auto deviceClause = |
| convertToDeviceClauseKind(attribute.getDeviceType().getValue()); |
| // unused for MLIR at the moment, required in Clang for book |
| // keeping |
| std::vector<llvm::GlobalVariable *> generatedRefs; |
| |
| std::vector<llvm::Triple> targetTriple; |
| auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>( |
| op->getParentOfType<mlir::ModuleOp>()->getAttr( |
| LLVM::LLVMDialect::getTargetTripleAttrName())); |
| if (targetTripleAttr) |
| targetTriple.emplace_back(targetTripleAttr.data()); |
| |
| auto fileInfoCallBack = [&loc]() { |
| std::string filename = ""; |
| std::uint64_t lineNo = 0; |
| |
| if (loc) { |
| filename = loc.getFilename().str(); |
| lineNo = loc.getLine(); |
| } |
| |
| return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename), |
| lineNo); |
| }; |
| |
| ompBuilder->registerTargetGlobalVariable( |
| captureClause, deviceClause, isDeclaration, isExternallyVisible, |
| ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, |
| generatedRefs, /*OpenMPSimd*/ false, targetTriple, |
| /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr, |
| gVal->getType(), gVal); |
| |
| if (ompBuilder->Config.isTargetDevice() && |
| (attribute.getCaptureClause().getValue() != |
| mlir::omp::DeclareTargetCaptureClause::to || |
| ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { |
| ompBuilder->getAddrOfDeclareTargetVar( |
| captureClause, deviceClause, isDeclaration, isExternallyVisible, |
| ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, |
| generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(), |
| /*GlobalInitializer*/ nullptr, |
| /*VariableLinkage*/ nullptr); |
| } |
| } |
| } |
| |
| return success(); |
| } |
| |
| // Returns true if the operation is inside a TargetOp or |
| // is part of a declare target function. |
| static bool isTargetDeviceOp(Operation *op) { |
| // Assumes no reverse offloading |
| if (op->getParentOfType<omp::TargetOp>()) |
| return true; |
| |
| // Certain operations return results, and whether utilised in host or |
| // target there is a chance an LLVM Dialect operation depends on it |
| // by taking it in as an operand, so we must always lower these in |
| // some manner or result in an ICE (whether they end up in a no-op |
| // or otherwise). |
| if (mlir::isa<omp::ThreadprivateOp>(op)) |
| return true; |
| |
| if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>()) |
| if (auto declareTargetIface = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( |
| parentFn.getOperation())) |
| if (declareTargetIface.isDeclareTarget() && |
| declareTargetIface.getDeclareTargetDeviceType() != |
| mlir::omp::DeclareTargetDeviceType::host) |
| return true; |
| |
| return false; |
| } |
| |
| /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including |
| /// OpenMP runtime calls). |
| static LogicalResult |
| convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // For each loop, introduce one stack frame to hold loop information. Ensure |
| // this is only done for the outermost loop wrapper to prevent introducing |
| // multiple stack frames for a single loop. Initially set to null, the loop |
| // information structure is initialized during translation of the nested |
| // omp.loop_nest operation, making it available to translation of all loop |
| // wrappers after their body has been successfully translated. |
| bool isOutermostLoopWrapper = |
| isa_and_present<omp::LoopWrapperInterface>(op) && |
| !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp()); |
| |
| if (isOutermostLoopWrapper) |
| moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>(); |
| |
| auto result = |
| llvm::TypeSwitch<Operation *, LogicalResult>(op) |
| .Case([&](omp::BarrierOp op) -> LogicalResult { |
| if (failed(checkImplementationStatus(*op))) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = |
| ompBuilder->createBarrier(builder.saveIP(), |
| llvm::omp::OMPD_barrier); |
| return handleError(afterIP, *op); |
| }) |
| .Case([&](omp::TaskyieldOp op) { |
| if (failed(checkImplementationStatus(*op))) |
| return failure(); |
| |
| ompBuilder->createTaskyield(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::FlushOp op) { |
| if (failed(checkImplementationStatus(*op))) |
| return failure(); |
| |
| // No support in Openmp runtime function (__kmpc_flush) to accept |
| // the argument list. |
| // OpenMP standard states the following: |
| // "An implementation may implement a flush with a list by ignoring |
| // the list, and treating it the same as a flush without a list." |
| // |
| // The argument list is discarded so that, flush with a list is |
| // treated same as a flush without a list. |
| ompBuilder->createFlush(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::ParallelOp op) { |
| return convertOmpParallel(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::MaskedOp) { |
| return convertOmpMasked(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::MasterOp) { |
| return convertOmpMaster(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::CriticalOp) { |
| return convertOmpCritical(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedRegionOp) { |
| return convertOmpOrderedRegion(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedOp) { |
| return convertOmpOrdered(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::WsloopOp) { |
| return convertOmpWsloop(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SimdOp) { |
| return convertOmpSimd(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicReadOp) { |
| return convertOmpAtomicRead(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicWriteOp) { |
| return convertOmpAtomicWrite(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicUpdateOp op) { |
| return convertOmpAtomicUpdate(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicCaptureOp op) { |
| return convertOmpAtomicCapture(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::CancelOp op) { |
| return convertOmpCancel(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SectionsOp) { |
| return convertOmpSections(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SingleOp op) { |
| return convertOmpSingle(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TeamsOp op) { |
| return convertOmpTeams(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TaskOp op) { |
| return convertOmpTaskOp(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TaskgroupOp op) { |
| return convertOmpTaskgroupOp(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TaskwaitOp op) { |
| return convertOmpTaskwaitOp(op, builder, moduleTranslation); |
| }) |
| .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp, |
| omp::DeclareMapperInfoOp, omp::DeclareReductionOp, |
| omp::CriticalDeclareOp>([](auto op) { |
| // `yield` and `terminator` can be just omitted. The block structure |
| // was created in the region that handles their parent operation. |
| // `declare_reduction` will be used by reductions and is not |
| // converted directly, skip it. |
| // `declare_mapper` and `declare_mapper.info` are handled whenever |
| // they are referred to through a `map` clause. |
| // `critical.declare` is only used to declare names of critical |
| // sections which will be used by `critical` ops and hence can be |
| // ignored for lowering. The OpenMP IRBuilder will create unique |
| // name for critical section names. |
| return success(); |
| }) |
| .Case([&](omp::ThreadprivateOp) { |
| return convertOmpThreadprivate(*op, builder, moduleTranslation); |
| }) |
| .Case<omp::TargetDataOp, omp::TargetEnterDataOp, |
| omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) { |
| return convertOmpTargetData(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TargetOp) { |
| return convertOmpTarget(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::DistributeOp) { |
| return convertOmpDistribute(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::LoopNestOp) { |
| return convertOmpLoopNest(*op, builder, moduleTranslation); |
| }) |
| .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>( |
| [&](auto op) { |
| // No-op, should be handled by relevant owning operations e.g. |
| // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp |
| // etc. and then discarded |
| return success(); |
| }) |
| .Default([&](Operation *inst) { |
| return inst->emitError() |
| << "not yet implemented: " << inst->getName(); |
| }); |
| |
| if (isOutermostLoopWrapper) |
| moduleTranslation.stackPop(); |
| |
| return result; |
| } |
| |
| static LogicalResult |
| convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| return convertHostOrTargetOperation(op, builder, moduleTranslation); |
| } |
| |
| static LogicalResult |
| convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (isa<omp::TargetOp>(op)) |
| return convertOmpTarget(*op, builder, moduleTranslation); |
| if (isa<omp::TargetDataOp>(op)) |
| return convertOmpTargetData(op, builder, moduleTranslation); |
| bool interrupted = |
| op->walk<WalkOrder::PreOrder>([&](Operation *oper) { |
| if (isa<omp::TargetOp>(oper)) { |
| if (failed(convertOmpTarget(*oper, builder, moduleTranslation))) |
| return WalkResult::interrupt(); |
| return WalkResult::skip(); |
| } |
| if (isa<omp::TargetDataOp>(oper)) { |
| if (failed(convertOmpTargetData(oper, builder, moduleTranslation))) |
| return WalkResult::interrupt(); |
| return WalkResult::skip(); |
| } |
| |
| // Non-target ops might nest target-related ops, therefore, we |
| // translate them as non-OpenMP scopes. Translating them is needed by |
| // nested target-related ops since they might need LLVM values defined |
| // in their parent non-target ops. |
| if (isa<omp::OpenMPDialect>(oper->getDialect()) && |
| oper->getParentOfType<LLVM::LLVMFuncOp>() && |
| !oper->getRegions().empty()) { |
| if (auto blockArgsIface = |
| dyn_cast<omp::BlockArgOpenMPOpInterface>(oper)) |
| forwardArgs(moduleTranslation, blockArgsIface); |
| else { |
| // Here we map entry block arguments of |
| // non-BlockArgOpenMPOpInterface ops if they can be encountered |
| // inside of a function and they define any of these arguments. |
| if (isa<mlir::omp::AtomicUpdateOp>(oper)) |
| for (auto [operand, arg] : |
| llvm::zip_equal(oper->getOperands(), |
| oper->getRegion(0).getArguments())) { |
| moduleTranslation.mapValue( |
| arg, builder.CreateLoad( |
| moduleTranslation.convertType(arg.getType()), |
| moduleTranslation.lookupValue(operand))); |
| } |
| } |
| |
| if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) { |
| assert(builder.GetInsertBlock() && |
| "No insert block is set for the builder"); |
| for (auto iv : loopNest.getIVs()) { |
| // Map iv to an undefined value just to keep the IR validity. |
| moduleTranslation.mapValue( |
| iv, llvm::PoisonValue::get( |
| moduleTranslation.convertType(iv.getType()))); |
| } |
| } |
| |
| for (Region ®ion : oper->getRegions()) { |
| // Regions are fake in the sense that they are not a truthful |
| // translation of the OpenMP construct being converted (e.g. no |
| // OpenMP runtime calls will be generated). We just need this to |
| // prepare the kernel invocation args. |
| SmallVector<llvm::PHINode *> phis; |
| auto result = convertOmpOpRegions( |
| region, oper->getName().getStringRef().str() + ".fake.region", |
| builder, moduleTranslation, &phis); |
| if (failed(handleError(result, *oper))) |
| return WalkResult::interrupt(); |
| |
| builder.SetInsertPoint(result.get(), result.get()->end()); |
| } |
| |
| return WalkResult::skip(); |
| } |
| |
| return WalkResult::advance(); |
| }).wasInterrupted(); |
| return failure(interrupted); |
| } |
| |
| namespace { |
| |
| /// Implementation of the dialect interface that converts operations belonging |
| /// to the OpenMP dialect to LLVM IR. |
| class OpenMPDialectLLVMIRTranslationInterface |
| : public LLVMTranslationDialectInterface { |
| public: |
| using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; |
| |
| /// Translates the given operation to LLVM IR using the provided IR builder |
| /// and saving the state in `moduleTranslation`. |
| LogicalResult |
| convertOperation(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const final; |
| |
| /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, |
| /// runtime calls, or operation amendments |
| LogicalResult |
| amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions, |
| NamedAttribute attribute, |
| LLVM::ModuleTranslation &moduleTranslation) const final; |
| }; |
| |
| } // namespace |
| |
| LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( |
| Operation *op, ArrayRef<llvm::Instruction *> instructions, |
| NamedAttribute attribute, |
| LLVM::ModuleTranslation &moduleTranslation) const { |
| return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>( |
| attribute.getName()) |
| .Case("omp.is_target_device", |
| [&](Attribute attr) { |
| if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) { |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setIsTargetDevice(deviceAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.is_gpu", |
| [&](Attribute attr) { |
| if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) { |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setIsGPU(gpuAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.host_ir_filepath", |
| [&](Attribute attr) { |
| if (auto filepathAttr = dyn_cast<StringAttr>(attr)) { |
| llvm::OpenMPIRBuilder *ompBuilder = |
| moduleTranslation.getOpenMPBuilder(); |
| ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.flags", |
| [&](Attribute attr) { |
| if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr)) |
| return convertFlagsAttr(op, rtlAttr, moduleTranslation); |
| return failure(); |
| }) |
| .Case("omp.version", |
| [&](Attribute attr) { |
| if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) { |
| llvm::OpenMPIRBuilder *ompBuilder = |
| moduleTranslation.getOpenMPBuilder(); |
| ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp", |
| versionAttr.getVersion()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.declare_target", |
| [&](Attribute attr) { |
| if (auto declareTargetAttr = |
| dyn_cast<omp::DeclareTargetAttr>(attr)) |
| return convertDeclareTargetAttr(op, declareTargetAttr, |
| moduleTranslation); |
| return failure(); |
| }) |
| .Case("omp.requires", |
| [&](Attribute attr) { |
| if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) { |
| using Requires = omp::ClauseRequires; |
| Requires flags = requiresAttr.getValue(); |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setHasRequiresReverseOffload( |
| bitEnumContainsAll(flags, Requires::reverse_offload)); |
| config.setHasRequiresUnifiedAddress( |
| bitEnumContainsAll(flags, Requires::unified_address)); |
| config.setHasRequiresUnifiedSharedMemory( |
| bitEnumContainsAll(flags, Requires::unified_shared_memory)); |
| config.setHasRequiresDynamicAllocators( |
| bitEnumContainsAll(flags, Requires::dynamic_allocators)); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.target_triples", |
| [&](Attribute attr) { |
| if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) { |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.TargetTriples.clear(); |
| config.TargetTriples.reserve(triplesAttr.size()); |
| for (Attribute tripleAttr : triplesAttr) { |
| if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr)) |
| config.TargetTriples.emplace_back(tripleStrAttr.getValue()); |
| else |
| return failure(); |
| } |
| return success(); |
| } |
| return failure(); |
| }) |
| .Default([](Attribute) { |
| // Fall through for omp attributes that do not require lowering. |
| return success(); |
| })(attribute.getValue()); |
| |
| return failure(); |
| } |
| |
| /// Given an OpenMP MLIR operation, create the corresponding LLVM IR |
| /// (including OpenMP runtime calls). |
| LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( |
| Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const { |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| if (ompBuilder->Config.isTargetDevice()) { |
| if (isTargetDeviceOp(op)) { |
| return convertTargetDeviceOp(op, builder, moduleTranslation); |
| } else { |
| return convertTargetOpsInNest(op, builder, moduleTranslation); |
| } |
| } |
| return convertHostOrTargetOperation(op, builder, moduleTranslation); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { |
| registry.insert<omp::OpenMPDialect>(); |
| registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { |
| dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); |
| }); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { |
| DialectRegistry registry; |
| registerOpenMPDialectTranslation(registry); |
| context.appendDialectRegistry(registry); |
| } |