flang/lib/Lower/OpenMP/OpenMP.cpp - llvm-project - Git at Google

 //===-- OpenMP.cpp -- Open MP directive lowering --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
 //
 //===----------------------------------------------------------------------===//

 #include "flang/Lower/OpenMP.h"

 #include "ClauseProcessor.h"
 #include "Clauses.h"
 #include "DataSharingProcessor.h"
 #include "Decomposer.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
 #include "flang/Common/idioms.h"
 #include "flang/Lower/Bridge.h"
 #include "flang/Lower/ConvertExpr.h"
 #include "flang/Lower/ConvertVariable.h"
 #include "flang/Lower/DirectivesCommon.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Parser/characters.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Support/OpenMP-utils.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"

 using namespace Fortran::lower::omp;
 using namespace Fortran::common::openmp;

 //===----------------------------------------------------------------------===//
 // Code generation helper functions
 //===----------------------------------------------------------------------===//

 static void genOMPDispatch(lower::AbstractConverter &converter,
                            lower::SymMap &symTable,
                            semantics::SemanticsContext &semaCtx,
                            lower::pft::Evaluation &eval, mlir::Location loc,
                            const ConstructQueue &queue,
                            ConstructQueue::const_iterator item);

 static void processHostEvalClauses(lower::AbstractConverter &converter,
                                    semantics::SemanticsContext &semaCtx,
                                    lower::StatementContext &stmtCtx,
                                    lower::pft::Evaluation &eval,
                                    mlir::Location loc);

 namespace {
 /// Structure holding information that is needed to pass host-evaluated
 /// information to later lowering stages.
 class HostEvalInfo {
 public:
   // Allow this function access to private members in order to initialize them.
   friend void ::processHostEvalClauses(lower::AbstractConverter &,
                                        semantics::SemanticsContext &,
                                        lower::StatementContext &,
                                        lower::pft::Evaluation &,
                                        mlir::Location);

   /// Fill \c vars with values stored in \c ops.
   ///
   /// The order in which values are stored matches the one expected by \see
   /// bindOperands().
   void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const {
     vars.append(ops.loopLowerBounds);
     vars.append(ops.loopUpperBounds);
     vars.append(ops.loopSteps);

     if (ops.numTeamsLower)
       vars.push_back(ops.numTeamsLower);

     if (ops.numTeamsUpper)
       vars.push_back(ops.numTeamsUpper);

     if (ops.numThreads)
       vars.push_back(ops.numThreads);

     if (ops.threadLimit)
       vars.push_back(ops.threadLimit);
   }

   /// Update \c ops, replacing all values with the corresponding block argument
   /// in \c args.
   ///
   /// The order in which values are stored in \c args is the same as the one
   /// used by \see collectValues().
   void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) {
     assert(args.size() ==
                ops.loopLowerBounds.size() + ops.loopUpperBounds.size() +
                    ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) +
                    (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) +
                    (ops.threadLimit ? 1 : 0) &&
            "invalid block argument list");
     int argIndex = 0;
     for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i)
       ops.loopLowerBounds[i] = args[argIndex++];

     for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i)
       ops.loopUpperBounds[i] = args[argIndex++];

     for (size_t i = 0; i < ops.loopSteps.size(); ++i)
       ops.loopSteps[i] = args[argIndex++];

     if (ops.numTeamsLower)
       ops.numTeamsLower = args[argIndex++];

     if (ops.numTeamsUpper)
       ops.numTeamsUpper = args[argIndex++];

     if (ops.numThreads)
       ops.numThreads = args[argIndex++];

     if (ops.threadLimit)
       ops.threadLimit = args[argIndex++];
   }

   /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated
   /// values and Fortran symbols, respectively, if they have already been
   /// initialized but not yet applied.
   ///
   /// \returns whether an update was performed. If not, these clauses were not
   ///          evaluated in the host device.
   bool apply(mlir::omp::LoopNestOperands &clauseOps,
              llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) {
     if (iv.empty() || loopNestApplied) {
       loopNestApplied = true;
       return false;
     }

     loopNestApplied = true;
     clauseOps.loopLowerBounds = ops.loopLowerBounds;
     clauseOps.loopUpperBounds = ops.loopUpperBounds;
     clauseOps.loopSteps = ops.loopSteps;
     ivOut.append(iv);
     return true;
   }

   /// Update \p clauseOps with the corresponding host-evaluated values if they
   /// have already been initialized but not yet applied.
   ///
   /// \returns whether an update was performed. If not, these clauses were not
   ///          evaluated in the host device.
   bool apply(mlir::omp::ParallelOperands &clauseOps) {
     if (!ops.numThreads || parallelApplied) {
       parallelApplied = true;
       return false;
     }

     parallelApplied = true;
     clauseOps.numThreads = ops.numThreads;
     return true;
   }

   /// Update \p clauseOps with the corresponding host-evaluated values if they
   /// have already been initialized.
   ///
   /// \returns whether an update was performed. If not, these clauses were not
   ///          evaluated in the host device.
   bool apply(mlir::omp::TeamsOperands &clauseOps) {
     if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit)
       return false;

     clauseOps.numTeamsLower = ops.numTeamsLower;
     clauseOps.numTeamsUpper = ops.numTeamsUpper;
     clauseOps.threadLimit = ops.threadLimit;
     return true;
   }

 private:
   mlir::omp::HostEvaluatedOperands ops;
   llvm::SmallVector<const semantics::Symbol *> iv;
   bool loopNestApplied = false, parallelApplied = false;
 };
 } // namespace

 /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
 /// operations being created.
 ///
 /// The current implementation prevents nested 'target' regions from breaking
 /// the handling of the outer region by keeping a stack of information
 /// structures, but it will probably still require some further work to support
 /// reverse offloading.
 static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo;

 /// Bind symbols to their corresponding entry block arguments.
 ///
 /// The binding will be performed inside of the current block, which does not
 /// necessarily have to be part of the operation for which the binding is done.
 /// However, block arguments must be accessible. This enables controlling the
 /// insertion point of any new MLIR operations related to the binding of
 /// arguments of a loop wrapper operation.
 ///
 /// \param [in] converter - PFT to MLIR conversion interface.
 /// \param [in]        op - owner operation of the block arguments to bind.
 /// \param [in]      args - entry block arguments information for the given
 ///                         operation.
 static void bindEntryBlockArgs(lower::AbstractConverter &converter,
                                mlir::omp::BlockArgOpenMPOpInterface op,
                                const EntryBlockArgs &args) {
   assert(op != nullptr && "invalid block argument-defining operation");
   assert(args.isValid() && "invalid args");
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   auto bindSingleMapLike = [&converter,
                             &firOpBuilder](const semantics::Symbol &sym,
                                            const mlir::BlockArgument &arg) {
     // Clones the `bounds` placing them inside the entry block and returns
     // them.
     auto cloneBound = [&](mlir::Value bound) {
       if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
         mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp());
         return clonedOp->getResult(0);
       }
       TODO(converter.getCurrentLocation(),
            "target map-like clause operand unsupported bound type");
     };

     auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
       llvm::SmallVector<mlir::Value> clonedBounds;
       llvm::transform(bounds, std::back_inserter(clonedBounds),
                       [&](mlir::Value bound) { return cloneBound(bound); });
       return clonedBounds;
     };

     fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym);
     auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
     if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
       converter.bindSymbol(sym, arg);
     } else {
       extVal.match(
           [&](const fir::BoxValue &v) {
             converter.bindSymbol(sym,
                                  fir::BoxValue(arg, cloneBounds(v.getLBounds()),
                                                v.getExplicitParameters(),
                                                v.getExplicitExtents()));
           },
           [&](const fir::MutableBoxValue &v) {
             converter.bindSymbol(
                 sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
                                           v.getMutableProperties()));
           },
           [&](const fir::ArrayBoxValue &v) {
             converter.bindSymbol(
                 sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
                                         cloneBounds(v.getLBounds()),
                                         v.getSourceBox()));
           },
           [&](const fir::CharArrayBoxValue &v) {
             converter.bindSymbol(
                 sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
                                             cloneBounds(v.getExtents()),
                                             cloneBounds(v.getLBounds())));
           },
           [&](const fir::CharBoxValue &v) {
             converter.bindSymbol(
                 sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
           },
           [&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); },
           [&](const auto &) {
             TODO(converter.getCurrentLocation(),
                  "target map clause operand unsupported type");
           });
     }
   };

   auto bindMapLike =
       [&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms,
                            llvm::ArrayRef<mlir::BlockArgument> args) {
         // Structure component symbols don't have bindings, and can only be
         // explicitly mapped individually. If a member is captured implicitly
         // we map the entirety of the derived type when we find its symbol.
         llvm::SmallVector<const semantics::Symbol *> processedSyms;
         llvm::copy_if(syms, std::back_inserter(processedSyms),
                       [](auto *sym) { return !sym->owner().IsDerivedType(); });

         for (auto [sym, arg] : llvm::zip_equal(processedSyms, args))
           bindSingleMapLike(*sym, arg);
       };

   auto bindPrivateLike = [&converter, &firOpBuilder](
                              llvm::ArrayRef<const semantics::Symbol *> syms,
                              llvm::ArrayRef<mlir::Value> vars,
                              llvm::ArrayRef<mlir::BlockArgument> args) {
     llvm::SmallVector<const semantics::Symbol *> processedSyms;
     for (auto *sym : syms) {
       if (const auto *commonDet =
               sym->detailsIf<semantics::CommonBlockDetails>()) {
         llvm::transform(commonDet->objects(), std::back_inserter(processedSyms),
                         [&](const auto &mem) { return &*mem; });
       } else {
         processedSyms.push_back(sym);
       }
     }

     for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
       converter.bindSymbol(
           *sym,
           hlfir::translateToExtendedValue(
               var.getLoc(), firOpBuilder, hlfir::Entity{arg},
               /*contiguousHint=*/
               evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
               .first);
   };

   // Process in clause name alphabetical order to match block arguments order.
   // Do not bind host_eval variables because they cannot be used inside of the
   // corresponding region, except for very specific cases handled separately.
   bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs());
   bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
                   op.getInReductionBlockArgs());
   bindMapLike(args.map.syms, op.getMapBlockArgs());
   bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs());
   bindPrivateLike(args.reduction.syms, args.reduction.vars,
                   op.getReductionBlockArgs());
   bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars,
                   op.getTaskReductionBlockArgs());
   bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs());
   bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs());
 }

 /// Get the list of base values that the specified map-like variables point to.
 ///
 /// This function must be kept in sync with changes to the `createMapInfoOp`
 /// utility function, since it must take into account the potential introduction
 /// of levels of indirection (i.e. intermediate ops).
 ///
 /// \param [in]     vars - list of values passed to map-like clauses, returned
 ///                        by an `omp.map.info` operation.
 /// \param [out] baseOps - populated with the `var_ptr` values of the
 ///                        corresponding defining operations.
 static void
 extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars,
                         llvm::SmallVectorImpl<mlir::Value> &baseOps) {
   llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) {
     auto mapInfo = map.getDefiningOp<mlir::omp::MapInfoOp>();
     assert(mapInfo && "expected all map vars to be defined by omp.map.info");

     mlir::Value varPtr = mapInfo.getVarPtr();
     if (auto boxAddr = varPtr.getDefiningOp<fir::BoxAddrOp>())
       return boxAddr.getVal();

     return varPtr;
   });
 }

 /// Get the directive enumeration value corresponding to the given OpenMP
 /// construct PFT node.
 llvm::omp::Directive
 extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
   return common::visit(
       common::visitors{
           [](const parser::OpenMPAllocatorsConstruct &c) {
             return llvm::omp::OMPD_allocators;
           },
           [](const parser::OpenMPAssumeConstruct &c) {
             return llvm::omp::OMPD_assume;
           },
           [](const parser::OpenMPAtomicConstruct &c) {
             return llvm::omp::OMPD_atomic;
           },
           [](const parser::OpenMPBlockConstruct &c) {
             return std::get<parser::OmpBlockDirective>(
                        std::get<parser::OmpBeginBlockDirective>(c.t).t)
                 .v;
           },
           [](const parser::OpenMPCriticalConstruct &c) {
             return llvm::omp::OMPD_critical;
           },
           [](const parser::OpenMPDeclarativeAllocate &c) {
             return llvm::omp::OMPD_allocate;
           },
           [](const parser::OpenMPDispatchConstruct &c) {
             return llvm::omp::OMPD_dispatch;
           },
           [](const parser::OpenMPExecutableAllocate &c) {
             return llvm::omp::OMPD_allocate;
           },
           [](const parser::OpenMPLoopConstruct &c) {
             return std::get<parser::OmpLoopDirective>(
                        std::get<parser::OmpBeginLoopDirective>(c.t).t)
                 .v;
           },
           [](const parser::OpenMPSectionConstruct &c) {
             return llvm::omp::OMPD_section;
           },
           [](const parser::OpenMPSectionsConstruct &c) {
             return std::get<parser::OmpSectionsDirective>(
                        std::get<parser::OmpBeginSectionsDirective>(c.t).t)
                 .v;
           },
           [](const parser::OpenMPStandaloneConstruct &c) {
             return common::visit(
                 common::visitors{
                     [](const parser::OpenMPSimpleStandaloneConstruct &c) {
                       return c.v.DirId();
                     },
                     [](const parser::OpenMPFlushConstruct &c) {
                       return llvm::omp::OMPD_flush;
                     },
                     [](const parser::OpenMPCancelConstruct &c) {
                       return llvm::omp::OMPD_cancel;
                     },
                     [](const parser::OpenMPCancellationPointConstruct &c) {
                       return llvm::omp::OMPD_cancellation_point;
                     },
                     [](const parser::OmpMetadirectiveDirective &c) {
                       return llvm::omp::OMPD_metadirective;
                     },
                     [](const parser::OpenMPDepobjConstruct &c) {
                       return llvm::omp::OMPD_depobj;
                     },
                     [](const parser::OpenMPInteropConstruct &c) {
                       return llvm::omp::OMPD_interop;
                     }},
                 c.u);
           },
           [](const parser::OpenMPUtilityConstruct &c) {
             return common::visit(
                 common::visitors{[](const parser::OmpErrorDirective &c) {
                                    return llvm::omp::OMPD_error;
                                  },
                                  [](const parser::OmpNothingDirective &c) {
                                    return llvm::omp::OMPD_nothing;
                                  }},
                 c.u);
           }},
       ompConstruct.u);
 }

 /// Populate the global \see hostEvalInfo after processing clauses for the given
 /// \p eval OpenMP target construct, or nested constructs, if these must be
 /// evaluated outside of the target region per the spec.
 ///
 /// In particular, this will ensure that in 'target teams' and equivalent nested
 /// constructs, the \c thread_limit and \c num_teams clauses will be evaluated
 /// in the host. Additionally, loop bounds, steps and the \c num_threads clause
 /// will also be evaluated in the host if a target SPMD construct is detected
 /// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting).
 ///
 /// The result, stored as a global, is intended to be used to populate the \c
 /// host_eval operands of the associated \c omp.target operation, and also to be
 /// checked and used by later lowering steps to populate the corresponding
 /// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest
 /// operations.
 static void processHostEvalClauses(lower::AbstractConverter &converter,
                                    semantics::SemanticsContext &semaCtx,
                                    lower::StatementContext &stmtCtx,
                                    lower::pft::Evaluation &eval,
                                    mlir::Location loc) {
   // Obtain the list of clauses of the given OpenMP block or loop construct
   // evaluation. Other evaluations passed to this lambda keep `clauses`
   // unchanged.
   auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval,
                                    List<Clause> &clauses) {
     const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
     if (!ompEval)
       return;

     const parser::OmpClauseList *beginClauseList = nullptr;
     const parser::OmpClauseList *endClauseList = nullptr;
     common::visit(
         common::visitors{
             [&](const parser::OpenMPBlockConstruct &ompConstruct) {
               const auto &beginDirective =
                   std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
               beginClauseList =
                   &std::get<parser::OmpClauseList>(beginDirective.t);
               endClauseList = &std::get<parser::OmpClauseList>(
                   std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
             },
             [&](const parser::OpenMPLoopConstruct &ompConstruct) {
               const auto &beginDirective =
                   std::get<parser::OmpBeginLoopDirective>(ompConstruct.t);
               beginClauseList =
                   &std::get<parser::OmpClauseList>(beginDirective.t);

               if (auto &endDirective =
                       std::get<std::optional<parser::OmpEndLoopDirective>>(
                           ompConstruct.t))
                 endClauseList =
                     &std::get<parser::OmpClauseList>(endDirective->t);
             },
             [&](const auto &) {}},
         ompEval->u);

     assert(beginClauseList && "expected begin directive");
     clauses.append(makeClauses(*beginClauseList, semaCtx));

     if (endClauseList)
       clauses.append(makeClauses(*endClauseList, semaCtx));
   };

   // Return the directive that is immediately nested inside of the given
   // `parent` evaluation, if it is its only non-end-statement nested evaluation
   // and it represents an OpenMP construct.
   auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent)
       -> std::optional<llvm::omp::Directive> {
     if (!parent.hasNestedEvaluations())
       return std::nullopt;

     llvm::omp::Directive dir;
     auto &nested = parent.getFirstNestedEvaluation();
     if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
       dir = extractOmpDirective(*ompEval);
     else
       return std::nullopt;

     for (auto &sibling : parent.getNestedEvaluations())
       if (&sibling != &nested && !sibling.isEndStmt())
         return std::nullopt;

     return dir;
   };

   // Process the given evaluation assuming it's part of a 'target' construct or
   // captured by one, and store results in the global `hostEvalInfo`.
   std::function<void(lower::pft::Evaluation &, const List<Clause> &)>
       processEval;
   processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) {
     using namespace llvm::omp;
     ClauseProcessor cp(converter, semaCtx, clauses);

     // Call `processEval` recursively with the immediately nested evaluation and
     // its corresponding clauses if there is a single nested evaluation
     // representing an OpenMP directive that passes the given test.
     auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) {
       std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval);
       if (!nestedDir || !test(*nestedDir))
         return;

       lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation();
       List<lower::omp::Clause> nestedClauses;
       extractClauses(nestedEval, nestedClauses);
       processEval(nestedEval, nestedClauses);
     };

     const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
     if (!ompEval)
       return;

     HostEvalInfo &hostInfo = hostEvalInfo.back();

     switch (extractOmpDirective(*ompEval)) {
     case OMPD_teams_distribute_parallel_do:
     case OMPD_teams_distribute_parallel_do_simd:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_target_teams_distribute_parallel_do:
     case OMPD_target_teams_distribute_parallel_do_simd:
       cp.processNumTeams(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_distribute_parallel_do:
     case OMPD_distribute_parallel_do_simd:
       cp.processNumThreads(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_distribute:
     case OMPD_distribute_simd:
       cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
       break;

     case OMPD_teams:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_target_teams:
       cp.processNumTeams(stmtCtx, hostInfo.ops);
       processSingleNestedIf([](Directive nestedDir) {
         return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir);
       });
       break;

     case OMPD_teams_distribute:
     case OMPD_teams_distribute_simd:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_target_teams_distribute:
     case OMPD_target_teams_distribute_simd:
       cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
       cp.processNumTeams(stmtCtx, hostInfo.ops);
       break;

     case OMPD_teams_loop:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_target_teams_loop:
       cp.processNumTeams(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_loop:
       cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
       break;

     // Standalone 'target' case.
     case OMPD_target: {
       processSingleNestedIf(
           [](Directive nestedDir) { return topTeamsSet.test(nestedDir); });
       break;
     }
     default:
       break;
     }
   };

   assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure");

   const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
   assert(ompEval &&
          llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
          "expected TARGET construct evaluation");
   (void)ompEval;

   // Use the whole list of clauses passed to the construct here, rather than the
   // ones only applied to omp.target.
   List<lower::omp::Clause> clauses;
   extractClauses(eval, clauses);
   processEval(eval, clauses);
 }

 static lower::pft::Evaluation *
 getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
   // Return the Evaluation of the innermost collapsed loop, or the current one
   // if there was no COLLAPSE.
   if (collapseValue == 0)
     return &eval;

   lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
   for (int i = 1; i < collapseValue; i++) {
     // The nested evaluations should be DoConstructs (i.e. they should form
     // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
     // EndDoStmt>.
     assert(curEval->isA<parser::DoConstruct>());
     curEval = &*std::next(curEval->getNestedEvaluations().begin());
   }
   return curEval;
 }

 static void genNestedEvaluations(lower::AbstractConverter &converter,
                                  lower::pft::Evaluation &eval,
                                  int collapseValue = 0) {
   lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue);

   for (lower::pft::Evaluation &e : curEval->getNestedEvaluations())
     converter.genEval(e);
 }

 static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
                                           fir::FirOpBuilder &firOpBuilder,
                                           const semantics::Symbol &sym,
                                           const lower::pft::Variable &var,
                                           mlir::Location currentLocation) {
   std::string globalName = converter.mangleName(sym);
   mlir::StringAttr linkage = firOpBuilder.createInternalLinkage();
   return Fortran::lower::defineGlobal(converter, var, globalName, linkage);
 }

 // Get the extended value for \p val by extracting additional variable
 // information from \p base.
 static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base,
                                            mlir::Value val) {
   return base.match(
       [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
         return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {});
       },
       [&](const auto &) -> fir::ExtendedValue {
         return fir::substBase(base, val);
       });
 }

 #ifndef NDEBUG
 static bool isThreadPrivate(lower::SymbolRef sym) {
   if (const auto *details = sym->detailsIf<semantics::CommonBlockDetails>()) {
     for (const auto &obj : details->objects())
       if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate))
         return false;
     return true;
   }
   return sym->test(semantics::Symbol::Flag::OmpThreadprivate);
 }
 #endif

 static void threadPrivatizeVars(lower::AbstractConverter &converter,
                                 lower::pft::Evaluation &eval) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Location currentLocation = converter.getCurrentLocation();
   mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());

   // If the symbol corresponds to the original ThreadprivateOp, use the symbol
   // value from that operation to create one ThreadprivateOp copy operation
   // inside the parallel region.
   // In some cases, however, the symbol will correspond to the original,
   // non-threadprivate variable. This can happen, for instance, with a common
   // block, declared in a separate module, used by a parent procedure and
   // privatized in its child procedure.
   auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value {
     assert(isThreadPrivate(sym));
     mlir::Value symValue = converter.getSymbolAddress(sym);
     mlir::Operation *op = symValue.getDefiningOp();
     if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
       op = declOp.getMemref().getDefiningOp();
     if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
       symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr();
     return firOpBuilder.create<mlir::omp::ThreadprivateOp>(
         currentLocation, symValue.getType(), symValue);
   };

   llvm::SetVector<const semantics::Symbol *> threadprivateSyms;
   converter.collectSymbolSet(eval, threadprivateSyms,
                              semantics::Symbol::Flag::OmpThreadprivate,
                              /*collectSymbols=*/true,
                              /*collectHostAssociatedSymbols=*/true);
   std::set<semantics::SourceName> threadprivateSymNames;

   // For a COMMON block, the ThreadprivateOp is generated for itself instead of
   // its members, so only bind the value of the new copied ThreadprivateOp
   // inside the parallel region to the common block symbol only once for
   // multiple members in one COMMON block.
   llvm::SetVector<const semantics::Symbol *> commonSyms;
   for (std::size_t i = 0; i < threadprivateSyms.size(); i++) {
     const semantics::Symbol *sym = threadprivateSyms[i];
     mlir::Value symThreadprivateValue;
     // The variable may be used more than once, and each reference has one
     // symbol with the same name. Only do once for references of one variable.
     if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end())
       continue;
     threadprivateSymNames.insert(sym->name());
     if (const semantics::Symbol *common =
             semantics::FindCommonBlockContaining(sym->GetUltimate())) {
       mlir::Value commonThreadprivateValue;
       if (commonSyms.contains(common)) {
         commonThreadprivateValue = converter.getSymbolAddress(*common);
       } else {
         commonThreadprivateValue = genThreadprivateOp(*common);
         converter.bindSymbol(*common, commonThreadprivateValue);
         commonSyms.insert(common);
       }
       symThreadprivateValue = lower::genCommonBlockMember(
           converter, currentLocation, sym->GetUltimate(),
           commonThreadprivateValue);
     } else {
       symThreadprivateValue = genThreadprivateOp(*sym);
     }

     fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym);
     fir::ExtendedValue symThreadprivateExv =
         getExtendedValue(sexv, symThreadprivateValue);
     converter.bindSymbol(*sym, symThreadprivateExv);
   }
 }

 static mlir::Operation *
 createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
                               mlir::Location loc, mlir::Value indexVal,
                               const semantics::Symbol *sym) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());

   mlir::Type tempTy = converter.genType(*sym);

   assert(converter.isPresentShallowLookup(*sym) &&
          "Expected symbol to be in symbol table.");

   firOpBuilder.restoreInsertionPoint(insPt);
   mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
   hlfir::Entity lhs{converter.getSymbolAddress(*sym)};

   lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);

   mlir::Operation *storeOp =
       firOpBuilder.create<hlfir::AssignOp>(loc, cvtVal, lhs);
   return storeOp;
 }

 // This helper function implements the functionality of "promoting" non-CPTR
 // arguments of use_device_ptr to use_device_addr arguments (automagic
 // conversion of use_device_ptr -> use_device_addr in these cases). The way we
 // do so currently is through the shuffling of operands from the
 // devicePtrOperands to deviceAddrOperands, as well as the types, locations and
 // symbols.
 //
 // This effectively implements some deprecated OpenMP functionality that some
 // legacy applications unfortunately depend on (deprecated in specification
 // version 5.2):
 //
 // "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior
 //  is as if the list item appeared in a use_device_addr clause. Support for
 //  such list items in a use_device_ptr clause is deprecated."
 static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
     llvm::SmallVectorImpl<mlir::Value> &useDeviceAddrVars,
     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
     llvm::SmallVectorImpl<mlir::Value> &useDevicePtrVars,
     llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
   // Iterate over our use_device_ptr list and shift all non-cptr arguments into
   // use_device_addr.
   auto *varIt = useDevicePtrVars.begin();
   auto *symIt = useDevicePtrSyms.begin();
   while (varIt != useDevicePtrVars.end()) {
     if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) {
       ++varIt;
       ++symIt;
       continue;
     }

     useDeviceAddrVars.push_back(*varIt);
     useDeviceAddrSyms.push_back(*symIt);

     varIt = useDevicePtrVars.erase(varIt);
     symIt = useDevicePtrSyms.erase(symIt);
   }
 }

 /// Extract the list of function and variable symbols affected by the given
 /// 'declare target' directive and return the intended device type for them.
 static void getDeclareTargetInfo(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
     mlir::omp::DeclareTargetOperands &clauseOps,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
   const auto &spec =
       std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t);
   if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
     ObjectList objects{makeObjects(*objectList, semaCtx)};
     // Case: declare target(func, var1, var2)
     gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
                          symbolAndClause);
   } else if (const auto *clauseList{
                  parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
     List<Clause> clauses = makeClauses(*clauseList, semaCtx);
     if (clauses.empty()) {
       Fortran::lower::pft::FunctionLikeUnit *owningProc =
           eval.getOwningProcedure();
       if (owningProc && (!owningProc->isMainProgram() ||
                          owningProc->getMainProgramSymbol())) {
         // Case: declare target, implicit capture of function
         symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to,
                                      owningProc->getSubprogramSymbol());
       }
     }

     ClauseProcessor cp(converter, semaCtx, clauses);
     cp.processDeviceType(clauseOps);
     cp.processEnter(symbolAndClause);
     cp.processLink(symbolAndClause);
     cp.processTo(symbolAndClause);

     cp.processTODO<clause::Indirect>(converter.getCurrentLocation(),
                                      llvm::omp::Directive::OMPD_declare_target);
   }
 }

 static void collectDeferredDeclareTargets(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
     llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo>
         &deferredDeclareTarget) {
   mlir::omp::DeclareTargetOperands clauseOps;
   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);
   // Return the device type only if at least one of the targets for the
   // directive is a function or subroutine
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();

   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
     mlir::Operation *op = mod.lookupSymbol(
         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));

     if (!op) {
       deferredDeclareTarget.push_back({std::get<0>(symClause),
                                        clauseOps.deviceType,
                                        std::get<1>(symClause)});
     }
   }
 }

 static std::optional<mlir::omp::DeclareTargetDeviceType>
 getDeclareTargetFunctionDevice(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
   mlir::omp::DeclareTargetOperands clauseOps;
   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);

   // Return the device type only if at least one of the targets for the
   // directive is a function or subroutine
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
     mlir::Operation *op = mod.lookupSymbol(
         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));

     if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
       return clauseOps.deviceType;
   }

   return std::nullopt;
 }

 /// Set up the entry block of the given `omp.loop_nest` operation, adding a
 /// block argument for each loop induction variable and allocating and
 /// initializing a private value to hold each of them.
 ///
 /// This function can also bind the symbols of any variables that should match
 /// block arguments on parent loop wrapper operations attached to the same
 /// loop. This allows the introduction of any necessary `hlfir.declare`
 /// operations inside of the entry block of the `omp.loop_nest` operation and
 /// not directly under any of the wrappers, which would invalidate them.
 ///
 /// \param [in]          op - the loop nest operation.
 /// \param [in]   converter - PFT to MLIR conversion interface.
 /// \param [in]         loc - location.
 /// \param [in]        args - symbols of induction variables.
 /// \param [in] wrapperArgs - list of parent loop wrappers and their associated
 ///                           entry block arguments.
 static void genLoopVars(
     mlir::Operation *op, lower::AbstractConverter &converter,
     mlir::Location &loc, llvm::ArrayRef<const semantics::Symbol *> args,
     llvm::ArrayRef<
         std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
         wrapperArgs = {}) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   auto &region = op->getRegion(0);

   std::size_t loopVarTypeSize = 0;
   for (const semantics::Symbol *arg : args)
     loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
   mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
   llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
   llvm::SmallVector<mlir::Location> locs(args.size(), loc);
   firOpBuilder.createBlock(&region, {}, tiv, locs);

   // Update nested wrapper operands if parent wrappers have mapped these values
   // to block arguments.
   //
   // Binding these values earlier would take care of this, but we cannot rely on
   // that approach because binding in between the creation of a wrapper and the
   // next one would result in 'hlfir.declare' operations being introduced inside
   // of a wrapper, which is illegal.
   mlir::IRMapping mapper;
   for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
     for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands())
       operand.set(mapper.lookupOrDefault(operand.get()));

     for (const auto [arg, var] : llvm::zip_equal(
              argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars()))
       mapper.map(var, arg);
   }

   // Bind the entry block arguments of parent wrappers to the corresponding
   // symbols.
   for (auto [argGeneratingOp, blockArgs] : wrapperArgs)
     bindEntryBlockArgs(converter, argGeneratingOp, blockArgs);

   // The argument is not currently in memory, so make a temporary for the
   // argument, and store it there, then bind that location to the argument.
   mlir::Operation *storeOp = nullptr;
   for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
     mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
     storeOp =
         createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
   }
   firOpBuilder.setInsertionPointAfter(storeOp);
 }

 static void
 markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
                   mlir::omp::DeclareTargetCaptureClause captureClause,
                   mlir::omp::DeclareTargetDeviceType deviceType) {
   // TODO: Add support for program local variables with declare target applied
   auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
   if (!declareTargetOp)
     fir::emitFatalError(
         converter.getCurrentLocation(),
         "Attempt to apply declare target on unsupported operation");

   // The function or global already has a declare target applied to it, very
   // likely through implicit capture (usage in another declare target
   // function/subroutine). It should be marked as any if it has been assigned
   // both host and nohost, else we skip, as there is no change
   if (declareTargetOp.isDeclareTarget()) {
     if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
       declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
                                        captureClause);
     return;
   }

   declareTargetOp.setDeclareTarget(deviceType, captureClause);
 }

 //===----------------------------------------------------------------------===//
 // Op body generation helper structures and functions
 //===----------------------------------------------------------------------===//

 struct OpWithBodyGenInfo {
   /// A type for a code-gen callback function. This takes as argument the op for
   /// which the code is being generated and returns the arguments of the op's
   /// region.
   using GenOMPRegionEntryCBFn =
       std::function<llvm::SmallVector<const semantics::Symbol *>(
           mlir::Operation *)>;

   OpWithBodyGenInfo(lower::AbstractConverter &converter,
                     lower::SymMap &symTable,
                     semantics::SemanticsContext &semaCtx, mlir::Location loc,
                     lower::pft::Evaluation &eval, llvm::omp::Directive dir)
       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
         eval(eval), dir(dir) {}

   OpWithBodyGenInfo &setClauses(const List<Clause> *value) {
     clauses = value;
     return *this;
   }

   OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
     dsp = value;
     return *this;
   }

   OpWithBodyGenInfo &setEntryBlockArgs(const EntryBlockArgs *value) {
     blockArgs = value;
     return *this;
   }

   OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
     genRegionEntryCB = value;
     return *this;
   }

   OpWithBodyGenInfo &setGenSkeletonOnly(bool value) {
     genSkeletonOnly = value;
     return *this;
   }

   /// [inout] converter to use for the clauses.
   lower::AbstractConverter &converter;
   /// [in] Symbol table
   lower::SymMap &symTable;
   /// [in] Semantics context
   semantics::SemanticsContext &semaCtx;
   /// [in] location in source code.
   mlir::Location loc;
   /// [in] current PFT node/evaluation.
   lower::pft::Evaluation &eval;
   /// [in] leaf directive for which to generate the op body.
   llvm::omp::Directive dir;
   /// [in] list of clauses to process.
   const List<Clause> *clauses = nullptr;
   /// [in] if provided, processes the construct's data-sharing attributes.
   DataSharingProcessor *dsp = nullptr;
   /// [in] if provided, it is used to create the op's region entry block. It is
   /// overriden when a \see genRegionEntryCB is provided. This is only valid for
   /// operations implementing the \see mlir::omp::BlockArgOpenMPOpInterface.
   const EntryBlockArgs *blockArgs = nullptr;
   /// [in] if provided, it overrides the default op's region entry block
   /// creation.
   GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
   /// [in] if set to `true`, skip generating nested evaluations and dispatching
   /// any further leaf constructs.
   bool genSkeletonOnly = false;
 };

 /// Create the body (block) for an OpenMP Operation.
 ///
 /// \param [in]   op  - the operation the body belongs to.
 /// \param [in] info  - options controlling code-gen for the construction.
 /// \param [in] queue - work queue with nested constructs.
 /// \param [in] item  - item in the queue to generate body for.
 static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
                            const ConstructQueue &queue,
                            ConstructQueue::const_iterator item) {
   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();

   auto insertMarker = [](fir::FirOpBuilder &builder) {
     mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
                                                      builder.getIndexType());
     return undef.getDefiningOp();
   };

   // Create the entry block for the region and collect its arguments for use
   // within the region. The entry block will be created as follows:
   //   - By default, it will be empty and have no arguments.
   //   - Operations implementing the omp::BlockArgOpenMPOpInterface can set the
   //     `info.blockArgs` pointer so that block arguments will be those
   //     corresponding to entry block argument-generating clauses. Binding of
   //     Fortran symbols to the new MLIR values is done automatically.
   //   - If the `info.genRegionEntryCB` callback is set, it takes precedence and
   //     allows callers to manually create the entry block with its intended
   //     list of arguments and to bind these arguments to their corresponding
   //     Fortran symbols. This is used for e.g. loop induction variables.
   auto regionArgs = [&]() -> llvm::SmallVector<const semantics::Symbol *> {
     if (info.genRegionEntryCB)
       return info.genRegionEntryCB(&op);

     if (info.blockArgs) {
       genEntryBlock(firOpBuilder, *info.blockArgs, op.getRegion(0));
       bindEntryBlockArgs(info.converter,
                          llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op),
                          *info.blockArgs);
       return llvm::to_vector(info.blockArgs->getSyms());
     }

     firOpBuilder.createBlock(&op.getRegion(0));
     return {};
   }();

   // Mark the earliest insertion point.
   mlir::Operation *marker = insertMarker(firOpBuilder);

   // If it is an unstructured region, create empty blocks for all evaluations.
   if (lower::omp::isLastItemInQueue(item, queue) &&
       info.eval.lowerAsUnstructured()) {
     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
         firOpBuilder, info.eval.getNestedEvaluations());
   }

   // Start with privatization, so that the lowering of the nested
   // code will use the right symbols.
   bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
                 llvm::omp::Association::Loop;
   bool privatize = info.clauses;

   firOpBuilder.setInsertionPoint(marker);
   std::optional<DataSharingProcessor> tempDsp;
   if (privatize && !info.dsp) {
     tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
                     Fortran::lower::omp::isLastItemInQueue(item, queue),
                     /*useDelayedPrivatization=*/false, info.symTable);
     tempDsp->processStep1();
   }

   if (info.dir == llvm::omp::Directive::OMPD_parallel) {
     threadPrivatizeVars(info.converter, info.eval);
     if (info.clauses) {
       firOpBuilder.setInsertionPoint(marker);
       ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
           .processCopyin();
     }
   }

   if (!info.genSkeletonOnly) {
     if (ConstructQueue::const_iterator next = std::next(item);
         next != queue.end()) {
       genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
                      info.loc, queue, next);
     } else {
       // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
       // a lot of complications for our approach if the terminator generation
       // is delayed past this point. Insert a temporary terminator here, then
       // delete it.
       firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
       auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
       firOpBuilder.setInsertionPointAfter(marker);
       genNestedEvaluations(info.converter, info.eval);
       temp->erase();
     }
   }

   // Get or create a unique exiting block from the given region, or
   // return nullptr if there is no exiting block.
   auto getUniqueExit = [&](mlir::Region &region) -> mlir::Block * {
     // Find the blocks where the OMP terminator should go. In simple cases
     // it is the single block in the operation's region. When the region
     // is more complicated, especially with unstructured control flow, there
     // may be multiple blocks, and some of them may have non-OMP terminators
     // resulting from lowering of the code contained within the operation.
     // All the remaining blocks are potential exit points from the op's region.
     //
     // Explicit control flow cannot exit any OpenMP region (other than via
     // STOP), and that is enforced by semantic checks prior to lowering. STOP
     // statements are lowered to a function call.

     // Collect unterminated blocks.
     llvm::SmallVector<mlir::Block *> exits;
     for (mlir::Block &b : region) {
       if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
         exits.push_back(&b);
     }

     if (exits.empty())
       return nullptr;
     // If there already is a unique exiting block, do not create another one.
     // Additionally, some ops (e.g. omp.sections) require only 1 block in
     // its region.
     if (exits.size() == 1)
       return exits[0];
     mlir::Block *exit = firOpBuilder.createBlock(&region);
     for (mlir::Block *b : exits) {
       firOpBuilder.setInsertionPointToEnd(b);
       firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit);
     }
     return exit;
   };

   if (auto *exitBlock = getUniqueExit(op.getRegion(0))) {
     firOpBuilder.setInsertionPointToEnd(exitBlock);
     auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
     // Only insert lastprivate code when there actually is an exit block.
     // Such a block may not exist if the nested code produced an infinite
     // loop (this may not make sense in production code, but a user could
     // write that and we should handle it).
     firOpBuilder.setInsertionPoint(term);
     if (privatize) {
       // DataSharingProcessor::processStep2() may create operations before/after
       // the one passed as argument. We need to treat loop wrappers and their
       // nested loop as a unit, so we need to pass the bottom level wrapper (if
       // present). Otherwise, these operations will be inserted within a
       // wrapper region.
       mlir::Operation *privatizationBottomLevelOp = &op;
       if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) {
         llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
         loopNest.gatherWrappers(wrappers);
         if (!wrappers.empty())
           privatizationBottomLevelOp = &*wrappers.front();
       }

       if (!info.dsp) {
         assert(tempDsp.has_value());
         tempDsp->processStep2(privatizationBottomLevelOp, isLoop);
       } else {
         if (isLoop && regionArgs.size() > 0) {
           for (const auto &regionArg : regionArgs) {
             info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
           }
         }
         info.dsp->processStep2(privatizationBottomLevelOp, isLoop);
       }
     }
   }

   firOpBuilder.setInsertionPointAfter(marker);
   marker->erase();
 }

 static void genBodyOfTargetDataOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args,
     const mlir::Location &currentLocation, const ConstructQueue &queue,
     ConstructQueue::const_iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   genEntryBlock(firOpBuilder, args, dataOp.getRegion());
   bindEntryBlockArgs(converter, dataOp, args);

   // Insert dummy instruction to remember the insertion position. The
   // marker will be deleted by clean up passes since there are no uses.
   // Remembering the position for further insertion is important since
   // there are hlfir.declares inserted above while setting block arguments
   // and new code from the body should be inserted after that.
   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
       dataOp.getLoc(), firOpBuilder.getIndexType());

   // Create blocks for unstructured regions. This has to be done since
   // blocks are initially allocated with the function as the parent region.
   if (eval.lowerAsUnstructured()) {
     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
         firOpBuilder, eval.getNestedEvaluations());
   }

   firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);

   // Set the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());

   if (ConstructQueue::const_iterator next = std::next(item);
       next != queue.end()) {
     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                    next);
   } else {
     genNestedEvaluations(converter, eval);
   }
 }

 // This generates intermediate common block member accesses within a region
 // and then rebinds the members symbol to the intermediate accessors we have
 // generated so that subsequent code generation will utilise these instead.
 //
 // When the scope changes, the bindings to the intermediate accessors should
 // be dropped in place of the original symbol bindings.
 //
 // This is for utilisation with TargetOp.
 static void genIntermediateCommonBlockAccessors(
     Fortran::lower::AbstractConverter &converter,
     const mlir::Location &currentLocation,
     llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs,
     llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms) {
   // Iterate over the symbol list, which will be shorter than the list of
   // arguments if new entry block arguments were introduced to implicitly map
   // outside values used by the bounds cloned into the target region. In that
   // case, the additional block arguments do not need processing here.
   for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) {
     auto *details = mapSym->detailsIf<Fortran::semantics::CommonBlockDetails>();
     if (!details)
       continue;

     for (auto obj : details->objects()) {
       auto targetCBMemberBind = Fortran::lower::genCommonBlockMember(
           converter, currentLocation, *obj, mapArg);
       fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj);
       fir::ExtendedValue targetCBExv =
           getExtendedValue(sexv, targetCBMemberBind);
       converter.bindSymbol(*obj, targetCBExv);
     }
   }
 }

 // This functions creates a block for the body of the targetOp's region. It adds
 // all the symbols present in mapSymbols as block arguments to this block.
 static void genBodyOfTargetOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args,
     const mlir::Location &currentLocation, const ConstructQueue &queue,
     ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);

   mlir::Region &region = targetOp.getRegion();
   mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region);
   bindEntryBlockArgs(converter, targetOp, args);
   if (!hostEvalInfo.empty())
     hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs());

   // Check if cloning the bounds introduced any dependency on the outer region.
   // If so, then either clone them as well if they are MemoryEffectFree, or else
   // copy them to a new temporary and add them to the map and block_argument
   // lists and replace their uses with the new temporary.
   llvm::SetVector<mlir::Value> valuesDefinedAbove;
   mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
   while (!valuesDefinedAbove.empty()) {
     for (mlir::Value val : valuesDefinedAbove) {
       mlir::Operation *valOp = val.getDefiningOp();
       assert(valOp != nullptr);

       // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
       // indices separately, as the alternative is to eventually map the Box,
       // which comes with a fairly large overhead comparatively. We could be
       // more robust about this and check using a BackwardsSlice to see if we
       // run the risk of mapping a box.
       if (mlir::isMemoryEffectFree(valOp) &&
           !mlir::isa<fir::BoxDimsOp>(valOp)) {
         mlir::Operation *clonedOp = valOp->clone();
         entryBlock->push_front(clonedOp);

         auto replace = [entryBlock](mlir::OpOperand &use) {
           return use.getOwner()->getBlock() == entryBlock;
         };

         valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
         valOp->replaceUsesWithIf(clonedOp, replace);
       } else {
         auto savedIP = firOpBuilder.getInsertionPoint();
         firOpBuilder.setInsertionPointAfter(valOp);
         auto copyVal =
             firOpBuilder.createTemporary(val.getLoc(), val.getType());
         firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);

         fir::factory::AddrAndBoundsInfo info =
             fir::factory::getDataOperandBaseAddr(
                 firOpBuilder, val, /*isOptional=*/false, val.getLoc());
         llvm::SmallVector<mlir::Value> bounds =
             fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
                                                mlir::omp::MapBoundsType>(
                 firOpBuilder, info,
                 hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
                                                 hlfir::Entity{val})
                     .first,
                 /*dataExvIsAssumedSize=*/false, val.getLoc());

         std::stringstream name;
         firOpBuilder.setInsertionPoint(targetOp);

         llvm::omp::OpenMPOffloadMappingFlags mapFlag =
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
         mlir::omp::VariableCaptureKind captureKind =
             mlir::omp::VariableCaptureKind::ByRef;

         mlir::Type eleType = copyVal.getType();
         if (auto refType =
                 mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
           eleType = refType.getElementType();

         if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
           captureKind = mlir::omp::VariableCaptureKind::ByCopy;
         } else if (!fir::isa_builtin_cptr_type(eleType)) {
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
         }

         mlir::Value mapOp = createMapInfoOp(
             firOpBuilder, copyVal.getLoc(), copyVal,
             /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
             /*members=*/llvm::SmallVector<mlir::Value>{},
             /*membersIndex=*/mlir::ArrayAttr{},
             static_cast<
                 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
                 mapFlag),
             captureKind, copyVal.getType());

         // Get the index of the first non-map argument before modifying mapVars,
         // then append an element to mapVars and an associated entry block
         // argument at that index.
         unsigned insertIndex =
             argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs();
         targetOp.getMapVarsMutable().append(mapOp);
         mlir::Value clonedValArg = region.insertArgument(
             insertIndex, copyVal.getType(), copyVal.getLoc());

         firOpBuilder.setInsertionPointToStart(entryBlock);
         auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
                                                        clonedValArg);
         val.replaceUsesWithIf(loadOp->getResult(0),
                               [entryBlock](mlir::OpOperand &use) {
                                 return use.getOwner()->getBlock() == entryBlock;
                               });
         firOpBuilder.setInsertionPoint(entryBlock, savedIP);
       }
     }
     valuesDefinedAbove.clear();
     mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
   }

   // Insert dummy instruction to remember the insertion position. The
   // marker will be deleted since there are not uses.
   // In the HLFIR flow there are hlfir.declares inserted above while
   // setting block arguments.
   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
       targetOp.getLoc(), firOpBuilder.getIndexType());

   // Create blocks for unstructured regions. This has to be done since
   // blocks are initially allocated with the function as the parent region.
   if (lower::omp::isLastItemInQueue(item, queue) &&
       eval.lowerAsUnstructured()) {
     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
         firOpBuilder, eval.getNestedEvaluations());
   }

   firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);

   // Create the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());

   // If we map a common block using it's symbol e.g. map(tofrom: /common_block/)
   // and accessing its members within the target region, there is a large
   // chance we will end up with uses external to the region accessing the common
   // resolve these, we do so by generating new common block member accesses
   // within the region, binding them to the member symbol for the scope of the
   // region so that subsequent code generation within the region will utilise
   // our new member accesses we have created.
   genIntermediateCommonBlockAccessors(
       converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms);

   if (ConstructQueue::const_iterator next = std::next(item);
       next != queue.end()) {
     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                    next);
   } else {
     genNestedEvaluations(converter, eval);
   }

   dsp.processStep2(targetOp, /*isLoop=*/false);
 }

 template <typename OpTy, typename... Args>
 static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
                           const ConstructQueue &queue,
                           ConstructQueue::const_iterator item, Args &&...args) {
   auto op = info.converter.getFirOpBuilder().create<OpTy>(
       info.loc, std::forward<Args>(args)...);
   createBodyOfOp(*op, info, queue, item);
   return op;
 }

 template <typename OpTy, typename ClauseOpsTy>
 static OpTy genWrapperOp(lower::AbstractConverter &converter,
                          mlir::Location loc, const ClauseOpsTy &clauseOps,
                          const EntryBlockArgs &args) {
   static_assert(
       OpTy::template hasTrait<mlir::omp::LoopWrapperInterface::Trait>(),
       "expected a loop wrapper");
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   // Create wrapper.
   auto op = firOpBuilder.create<OpTy>(loc, clauseOps);

   // Create entry block with arguments.
   genEntryBlock(firOpBuilder, args, op.getRegion());

   return op;
 }

 //===----------------------------------------------------------------------===//
 // Code generation functions for clauses
 //===----------------------------------------------------------------------===//

 static void genCancelClauses(lower::AbstractConverter &converter,
                              semantics::SemanticsContext &semaCtx,
                              const List<Clause> &clauses, mlir::Location loc,
                              mlir::omp::CancelOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processCancelDirectiveName(clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_cancel, clauseOps);
 }

 static void
 genCancellationPointClauses(lower::AbstractConverter &converter,
                             semantics::SemanticsContext &semaCtx,
                             const List<Clause> &clauses, mlir::Location loc,
                             mlir::omp::CancellationPointOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processCancelDirectiveName(clauseOps);
 }

 static void genCriticalDeclareClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processHint(clauseOps);
   clauseOps.symName =
       mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
 }

 static void genDistributeClauses(lower::AbstractConverter &converter,
                                  semantics::SemanticsContext &semaCtx,
                                  lower::StatementContext &stmtCtx,
                                  const List<Clause> &clauses,
                                  mlir::Location loc,
                                  mlir::omp::DistributeOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processDistSchedule(stmtCtx, clauseOps);
   cp.processOrder(clauseOps);
 }

 static void genFlushClauses(lower::AbstractConverter &converter,
                             semantics::SemanticsContext &semaCtx,
                             const ObjectList &objects,
                             const List<Clause> &clauses, mlir::Location loc,
                             llvm::SmallVectorImpl<mlir::Value> &operandRange) {
   if (!objects.empty())
     genObjectList(objects, converter, operandRange);

   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::AcqRel, clause::Acquire, clause::Release,
                  clause::SeqCst>(loc, llvm::omp::OMPD_flush);
 }

 static void
 genLoopNestClauses(lower::AbstractConverter &converter,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval, const List<Clause> &clauses,
                    mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
                    llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
   ClauseProcessor cp(converter, semaCtx, clauses);

   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv))
     cp.processCollapse(loc, eval, clauseOps, iv);

   clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
 }

 static void genLoopClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::LoopOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processBind(clauseOps);
   cp.processOrder(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
   cp.processTODO<clause::Lastprivate>(loc, llvm::omp::Directive::OMPD_loop);
 }

 static void genMaskedClauses(lower::AbstractConverter &converter,
                              semantics::SemanticsContext &semaCtx,
                              lower::StatementContext &stmtCtx,
                              const List<Clause> &clauses, mlir::Location loc,
                              mlir::omp::MaskedOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processFilter(stmtCtx, clauseOps);
 }

 static void
 genOrderedRegionClauses(lower::AbstractConverter &converter,
                         semantics::SemanticsContext &semaCtx,
                         const List<Clause> &clauses, mlir::Location loc,
                         mlir::omp::OrderedRegionOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered);
 }

 static void genParallelClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
     mlir::Location loc, mlir::omp::ParallelOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);

   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps))
     cp.processNumThreads(stmtCtx, clauseOps);

   cp.processProcBind(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
 }

 static void genScanClauses(lower::AbstractConverter &converter,
                            semantics::SemanticsContext &semaCtx,
                            const List<Clause> &clauses, mlir::Location loc,
                            mlir::omp::ScanOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processInclusive(loc, clauseOps);
   cp.processExclusive(loc, clauseOps);
 }

 static void genSectionsClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::SectionsOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processNowait(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
   // TODO Support delayed privatization.
 }

 static void genSimdClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::SimdOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAligned(clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
   cp.processNontemporal(clauseOps);
   cp.processOrder(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
   cp.processSafelen(clauseOps);
   cp.processSimdlen(clauseOps);

   cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
 }

 static void genSingleClauses(lower::AbstractConverter &converter,
                              semantics::SemanticsContext &semaCtx,
                              const List<Clause> &clauses, mlir::Location loc,
                              mlir::omp::SingleOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processCopyprivate(loc, clauseOps);
   cp.processNowait(clauseOps);
   // TODO Support delayed privatization.
 }

 static void genTargetClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::SymMap &symTable, lower::StatementContext &stmtCtx,
     lower::pft::Evaluation &eval, const List<Clause> &clauses,
     mlir::Location loc, mlir::omp::TargetOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
     llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
     llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processBare(clauseOps);
   cp.processDepend(symTable, stmtCtx, clauseOps);
   cp.processDevice(stmtCtx, clauseOps);
   cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms);
   if (!hostEvalInfo.empty()) {
     // Only process host_eval if compiling for the host device.
     processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
     hostEvalInfo.back().collectValues(clauseOps.hostEvalVars);
   }
   cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
   cp.processIsDevicePtr(clauseOps, isDevicePtrSyms);
   cp.processMap(loc, stmtCtx, clauseOps, &mapSyms);
   cp.processNowait(clauseOps);
   cp.processThreadLimit(stmtCtx, clauseOps);

   cp.processTODO<clause::Allocate, clause::Defaultmap, clause::InReduction,
                  clause::UsesAllocators>(loc,
                                          llvm::omp::Directive::OMPD_target);

   // `target private(..)` is only supported in delayed privatization mode.
   if (!enableDelayedPrivatizationStaging)
     cp.processTODO<clause::Firstprivate, clause::Private>(
         loc, llvm::omp::Directive::OMPD_target);
 }

 static void genTargetDataClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
     mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
     llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processDevice(stmtCtx, clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
   cp.processMap(loc, stmtCtx, clauseOps);
   cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms);
   cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms);

   // This function implements the deprecated functionality of use_device_ptr
   // that allows users to provide non-CPTR arguments to it with the caveat
   // that the compiler will treat them as use_device_addr. A lot of legacy
   // code may still depend on this functionality, so we should support it
   // in some manner. We do so currently by simply shifting non-cptr operands
   // from the use_device_ptr lists into the use_device_addr lists.
   // TODO: Perhaps create a user provideable compiler option that will
   // re-introduce a hard-error rather than a warning in these cases.
   promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
       clauseOps.useDeviceAddrVars, useDeviceAddrSyms,
       clauseOps.useDevicePtrVars, useDevicePtrSyms);
 }

 static void genTargetEnterExitUpdateDataClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::SymMap &symTable, lower::StatementContext &stmtCtx,
     const List<Clause> &clauses, mlir::Location loc,
     llvm::omp::Directive directive,
     mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processDepend(symTable, stmtCtx, clauseOps);
   cp.processDevice(stmtCtx, clauseOps);
   cp.processIf(directive, clauseOps);

   if (directive == llvm::omp::Directive::OMPD_target_update)
     cp.processMotionClauses(stmtCtx, clauseOps);
   else
     cp.processMap(loc, stmtCtx, clauseOps);

   cp.processNowait(clauseOps);
 }

 static void genTaskClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::SymMap &symTable, lower::StatementContext &stmtCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::TaskOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &inReductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processDepend(symTable, stmtCtx, clauseOps);
   cp.processFinal(stmtCtx, clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps);
   cp.processInReduction(loc, clauseOps, inReductionSyms);
   cp.processMergeable(clauseOps);
   cp.processPriority(stmtCtx, clauseOps);
   cp.processUntied(clauseOps);
   cp.processDetach(clauseOps);

   cp.processTODO<clause::Affinity>(loc, llvm::omp::Directive::OMPD_task);
 }

 static void genTaskgroupClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
     mlir::omp::TaskgroupOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &taskReductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processTaskReduction(loc, clauseOps, taskReductionSyms);
 }

 static void genTaskloopClauses(lower::AbstractConverter &converter,
                                semantics::SemanticsContext &semaCtx,
                                const List<Clause> &clauses, mlir::Location loc,
                                mlir::omp::TaskloopOperands &clauseOps) {

   ClauseProcessor cp(converter, semaCtx, clauses);

   cp.processTODO<clause::Allocate, clause::Collapse, clause::Default,
                  clause::Final, clause::Grainsize, clause::If,
                  clause::InReduction, clause::Lastprivate, clause::Mergeable,
                  clause::Nogroup, clause::NumTasks, clause::Priority,
                  clause::Reduction, clause::Shared, clause::Untied>(
       loc, llvm::omp::Directive::OMPD_taskloop);
 }

 static void genTaskwaitClauses(lower::AbstractConverter &converter,
                                semantics::SemanticsContext &semaCtx,
                                const List<Clause> &clauses, mlir::Location loc,
                                mlir::omp::TaskwaitOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::Depend, clause::Nowait>(
       loc, llvm::omp::Directive::OMPD_taskwait);
 }

 static void genWorkshareClauses(lower::AbstractConverter &converter,
                                 semantics::SemanticsContext &semaCtx,
                                 lower::StatementContext &stmtCtx,
                                 const List<Clause> &clauses, mlir::Location loc,
                                 mlir::omp::WorkshareOperands &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processNowait(clauseOps);
 }

 static void genTeamsClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
     mlir::Location loc, mlir::omp::TeamsOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processAllocate(clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps);

   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) {
     cp.processNumTeams(stmtCtx, clauseOps);
     cp.processThreadLimit(stmtCtx, clauseOps);
   }

   cp.processReduction(loc, clauseOps, reductionSyms);
   // TODO Support delayed privatization.
 }

 static void genWsloopClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
     mlir::Location loc, mlir::omp::WsloopOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processNowait(clauseOps);
   cp.processOrder(clauseOps);
   cp.processOrdered(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
   cp.processSchedule(stmtCtx, clauseOps);

   cp.processTODO<clause::Allocate, clause::Linear>(
       loc, llvm::omp::Directive::OMPD_do);
 }

 //===----------------------------------------------------------------------===//
 // Code generation functions for leaf constructs
 //===----------------------------------------------------------------------===//

 static mlir::omp::BarrierOp
 genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
              mlir::Location loc, const ConstructQueue &queue,
              ConstructQueue::const_iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
 }

 static mlir::omp::CancelOp genCancelOp(lower::AbstractConverter &converter,
                                        semantics::SemanticsContext &semaCtx,
                                        lower::pft::Evaluation &eval,
                                        mlir::Location loc,
                                        const ConstructQueue &queue,
                                        ConstructQueue::const_iterator item) {
   mlir::omp::CancelOperands clauseOps;
   genCancelClauses(converter, semaCtx, item->clauses, loc, clauseOps);

   return converter.getFirOpBuilder().create<mlir::omp::CancelOp>(loc,
                                                                  clauseOps);
 }

 static mlir::omp::CancellationPointOp genCancellationPointOp(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::CancellationPointOperands clauseOps;
   genCancellationPointClauses(converter, semaCtx, item->clauses, loc,
                               clauseOps);

   return converter.getFirOpBuilder().create<mlir::omp::CancellationPointOp>(
       loc, clauseOps);
 }

 static mlir::omp::CriticalOp
 genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               semantics::SemanticsContext &semaCtx,
               lower::pft::Evaluation &eval, mlir::Location loc,
               const ConstructQueue &queue, ConstructQueue::const_iterator item,
               const std::optional<parser::Name> &name) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::FlatSymbolRefAttr nameAttr;

   if (name) {
     std::string nameStr = name->ToString();
     mlir::ModuleOp mod = firOpBuilder.getModule();
     auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
     if (!global) {
       mlir::omp::CriticalDeclareOperands clauseOps;
       genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc,
                                 clauseOps, nameStr);

       mlir::OpBuilder modBuilder(mod.getBodyRegion());
       global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps);
     }
     nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
                                             global.getSymName());
   }

   return genOpWithBody<mlir::omp::CriticalOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_critical),
       queue, item, nameAttr);
 }

 static mlir::omp::FlushOp
 genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
            mlir::Location loc, const ObjectList &objects,
            const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   llvm::SmallVector<mlir::Value> operandRange;
   genFlushClauses(converter, semaCtx, objects, item->clauses, loc,
                   operandRange);

   return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
       converter.getCurrentLocation(), operandRange);
 }

 static mlir::omp::LoopNestOp genLoopNestOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     mlir::Location loc, const ConstructQueue &queue,
     ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps,
     llvm::ArrayRef<const semantics::Symbol *> iv,
     llvm::ArrayRef<
         std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
         wrapperArgs,
     llvm::omp::Directive directive, DataSharingProcessor &dsp) {
   auto ivCallback = [&](mlir::Operation *op) {
     genLoopVars(op, converter, loc, iv, wrapperArgs);
     return llvm::SmallVector<const semantics::Symbol *>(iv);
   };

   auto *nestedEval =
       getCollapsedLoopEval(eval, getCollapseValue(item->clauses));

   return genOpWithBody<mlir::omp::LoopNestOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
                         directive)
           .setClauses(&item->clauses)
           .setDataSharingProcessor(&dsp)
           .setGenRegionEntryCb(ivCallback),
       queue, item, clauseOps);
 }

 static mlir::omp::LoopOp
 genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
           semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
           mlir::Location loc, const ConstructQueue &queue,
           ConstructQueue::const_iterator item) {
   mlir::omp::LoopOperands loopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
   genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
                  loopReductionSyms);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&loopClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);

   EntryBlockArgs loopArgs;
   loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
   loopArgs.priv.vars = loopClauseOps.privateVars;
   loopArgs.reduction.syms = loopReductionSyms;
   loopArgs.reduction.vars = loopClauseOps.reductionVars;

   auto loopOp =
       genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{loopOp, loopArgs}},
                 llvm::omp::Directive::OMPD_loop, dsp);
   return loopOp;
 }

 static mlir::omp::MaskedOp
 genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             lower::StatementContext &stmtCtx,
             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
             mlir::Location loc, const ConstructQueue &queue,
             ConstructQueue::const_iterator item) {
   mlir::omp::MaskedOperands clauseOps;
   genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);

   return genOpWithBody<mlir::omp::MaskedOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_masked),
       queue, item, clauseOps);
 }

 static mlir::omp::MasterOp
 genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
             mlir::Location loc, const ConstructQueue &queue,
             ConstructQueue::const_iterator item) {
   return genOpWithBody<mlir::omp::MasterOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_master),
       queue, item);
 }

 static mlir::omp::OrderedOp
 genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
              mlir::Location loc, const ConstructQueue &queue,
              ConstructQueue::const_iterator item) {
   TODO(loc, "OMPD_ordered");
   return nullptr;
 }

 static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval, mlir::Location loc,
                    const ConstructQueue &queue,
                    ConstructQueue::const_iterator item) {
   mlir::omp::OrderedRegionOperands clauseOps;
   genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps);

   return genOpWithBody<mlir::omp::OrderedRegionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_ordered),
       queue, item, clauseOps);
 }

 static mlir::omp::ParallelOp
 genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               semantics::SemanticsContext &semaCtx,
               lower::pft::Evaluation &eval, mlir::Location loc,
               const ConstructQueue &queue, ConstructQueue::const_iterator item,
               mlir::omp::ParallelOperands &clauseOps,
               const EntryBlockArgs &args, DataSharingProcessor *dsp,
               bool isComposite = false) {
   assert((!enableDelayedPrivatization || dsp) &&
          "expected valid DataSharingProcessor");

   OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
           .setClauses(&item->clauses)
           .setEntryBlockArgs(&args)
           .setGenSkeletonOnly(isComposite)
           .setDataSharingProcessor(dsp);

   auto parallelOp =
       genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
   parallelOp.setComposite(isComposite);
   return parallelOp;
 }

 static mlir::omp::ScanOp
 genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
           semantics::SemanticsContext &semaCtx, mlir::Location loc,
           const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::ScanOperands clauseOps;
   genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
   return converter.getFirOpBuilder().create<mlir::omp::ScanOp>(
       converter.getCurrentLocation(), clauseOps);
 }

 /// This breaks the normal prototype of the gen*Op functions: adding the
 /// sectionBlocks argument so that the enclosed section constructs can be
 /// lowered here with correct reduction symbol remapping.
 static mlir::omp::SectionsOp
 genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               semantics::SemanticsContext &semaCtx,
               lower::pft::Evaluation &eval, mlir::Location loc,
               const ConstructQueue &queue, ConstructQueue::const_iterator item,
               const parser::OmpSectionBlocks &sectionBlocks) {
   mlir::omp::SectionsOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
   genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps,
                      reductionSyms);

   auto &builder = converter.getFirOpBuilder();

   // Insert privatizations before SECTIONS
   lower::SymMapScope scope(symTable);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            lower::omp::isLastItemInQueue(item, queue),
                            /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();

   List<Clause> nonDsaClauses;
   List<const clause::Lastprivate *> lastprivates;

   for (const Clause &clause : item->clauses) {
     if (clause.id == llvm::omp::Clause::OMPC_lastprivate) {
       auto &lastp = std::get<clause::Lastprivate>(clause.u);
       lastprivateModifierNotSupported(lastp, converter.getCurrentLocation());
       lastprivates.push_back(&lastp);
     } else {
       switch (clause.id) {
       case llvm::omp::Clause::OMPC_firstprivate:
       case llvm::omp::Clause::OMPC_private:
       case llvm::omp::Clause::OMPC_shared:
         break;
       default:
         nonDsaClauses.push_back(clause);
       }
     }
   }

   // SECTIONS construct.
   auto sectionsOp = builder.create<mlir::omp::SectionsOp>(loc, clauseOps);

   // Create entry block with reduction variables as arguments.
   EntryBlockArgs args;
   // TODO: Add private syms and vars.
   args.reduction.syms = reductionSyms;
   args.reduction.vars = clauseOps.reductionVars;

   genEntryBlock(builder, args, sectionsOp.getRegion());
   mlir::Operation *terminator =
       lower::genOpenMPTerminator(builder, sectionsOp, loc);

   // Generate nested SECTION constructs.
   // This is done here rather than in genOMP([...], OpenMPSectionConstruct )
   // because we need to run genReductionVars on each omp.section so that the
   // reduction variable gets mapped to the private version
   for (auto [construct, nestedEval] :
        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
     const auto *sectionConstruct =
         std::get_if<parser::OpenMPSectionConstruct>(&construct.u);
     if (!sectionConstruct) {
       assert(false &&
              "unexpected construct nested inside of SECTIONS construct");
       continue;
     }

     ConstructQueue sectionQueue{buildConstructQueue(
         converter.getFirOpBuilder().getModule(), semaCtx, nestedEval,
         sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})};

     builder.setInsertionPoint(terminator);
     genOpWithBody<mlir::omp::SectionOp>(
         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
                           llvm::omp::Directive::OMPD_section)
             .setClauses(&sectionQueue.begin()->clauses)
             .setDataSharingProcessor(&dsp)
             .setEntryBlockArgs(&args),
         sectionQueue, sectionQueue.begin());
   }

   if (!lastprivates.empty()) {
     mlir::Region &sectionsBody = sectionsOp.getRegion();
     assert(sectionsBody.hasOneBlock());
     mlir::Block &body = sectionsBody.front();

     auto lastSectionOp = llvm::find_if(
         llvm::reverse(body.getOperations()), [](const mlir::Operation &op) {
           return llvm::isa<mlir::omp::SectionOp>(op);
         });
     assert(lastSectionOp != body.rend());

     for (const clause::Lastprivate *lastp : lastprivates) {
       builder.setInsertionPoint(
           lastSectionOp->getRegion(0).back().getTerminator());
       mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
       const auto &objList = std::get<ObjectList>(lastp->t);
       for (const Object &object : objList) {
         semantics::Symbol *sym = object.sym();
         if (const auto *common =
                 sym->detailsIf<semantics::CommonBlockDetails>()) {
           for (const auto &obj : common->objects())
             converter.copyHostAssociateVar(*obj, &insp, /*hostIsSource=*/false);
         } else {
           converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false);
         }
       }
     }
   }

   // Perform DataSharingProcessor's step2 out of SECTIONS
   builder.setInsertionPointAfter(sectionsOp.getOperation());
   dsp.processStep2(sectionsOp, false);
   // Emit implicit barrier to synchronize threads and avoid data
   // races on post-update of lastprivate variables when `nowait`
   // clause is present.
   if (clauseOps.nowait && !lastprivates.empty())
     builder.create<mlir::omp::BarrierOp>(loc);

   return sectionsOp;
 }

 static mlir::Operation *
 genScopeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
            mlir::Location loc, const ConstructQueue &queue,
            ConstructQueue::const_iterator item) {
   TODO(loc, "Scope construct");
   return nullptr;
 }

 static mlir::omp::SingleOp
 genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
             mlir::Location loc, const ConstructQueue &queue,
             ConstructQueue::const_iterator item) {
   mlir::omp::SingleOperands clauseOps;
   genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps);

   return genOpWithBody<mlir::omp::SingleOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
           .setClauses(&item->clauses),
       queue, item, clauseOps);
 }

 static mlir::omp::TargetOp
 genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             lower::StatementContext &stmtCtx,
             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
             mlir::Location loc, const ConstructQueue &queue,
             ConstructQueue::const_iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   bool isTargetDevice =
       llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
           .getIsTargetDevice();

   // Introduce a new host_eval information structure for this target region.
   if (!isTargetDevice)
     hostEvalInfo.emplace_back();

   mlir::omp::TargetOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms,
       hasDeviceAddrSyms;
   genTargetClauses(converter, semaCtx, symTable, stmtCtx, eval, item->clauses,
                    loc, clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/
                            lower::omp::isLastItemInQueue(item, queue),
                            /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&clauseOps);

   // Check if a value of type `type` can be passed to the kernel by value.
   // All kernel parameters are of pointer type, so if the value can be
   // represented inside of a pointer, then it can be passed by value.
   auto isLiteralType = [&](mlir::Type type) {
     const mlir::DataLayout &dl = firOpBuilder.getDataLayout();
     mlir::Type ptrTy =
         mlir::LLVM::LLVMPointerType::get(&converter.getMLIRContext());
     uint64_t ptrSize = dl.getTypeSize(ptrTy);
     uint64_t ptrAlign = dl.getTypePreferredAlignment(ptrTy);

     auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash(
         loc, type, dl, converter.getKindMap());
     return size <= ptrSize && align <= ptrAlign;
   };

   // 5.8.1 Implicit Data-Mapping Attribute Rules
   // The following code follows the implicit data-mapping rules to map all the
   // symbols used inside the region that do not have explicit data-environment
   // attribute clauses (neither data-sharing; e.g. `private`, nor `map`
   // clauses).
   auto captureImplicitMap = [&](const semantics::Symbol &sym) {
     if (dsp.getAllSymbolsToPrivatize().contains(&sym))
       return;

     // These symbols are mapped individually in processHasDeviceAddr.
     if (llvm::is_contained(hasDeviceAddrSyms, &sym))
       return;

     // Structure component symbols don't have bindings, and can only be
     // explicitly mapped individually. If a member is captured implicitly
     // we map the entirety of the derived type when we find its symbol.
     if (sym.owner().IsDerivedType())
       return;

     // if the symbol is part of an already mapped common block, do not make a
     // map for it.
     if (const Fortran::semantics::Symbol *common =
             Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate()))
       if (llvm::is_contained(mapSyms, common))
         return;

     // If we come across a symbol without a symbol address, we
     // return as we cannot process it, this is intended as a
     // catch all early exit for symbols that do not have a
     // corresponding extended value. Such as subroutines,
     // interfaces and named blocks.
     if (!converter.getSymbolAddress(sym))
       return;

     if (!llvm::is_contained(mapSyms, &sym)) {
       if (const auto *details =
               sym.template detailsIf<semantics::HostAssocDetails>())
         converter.copySymbolBinding(details->symbol(), sym);
       std::stringstream name;
       fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
       name << sym.name().ToString();

       mlir::FlatSymbolRefAttr mapperId;
       if (sym.GetType()->category() == semantics::DeclTypeSpec::TypeDerived) {
         auto &typeSpec = sym.GetType()->derivedTypeSpec();
         std::string mapperIdName = typeSpec.name().ToString() + ".default";
         mapperIdName = converter.mangleName(mapperIdName, *typeSpec.GetScope());
         if (converter.getModuleOp().lookupSymbol(mapperIdName))
           mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(),
                                                   mapperIdName);
       }

       fir::factory::AddrAndBoundsInfo info =
           Fortran::lower::getDataOperandBaseAddr(
               converter, firOpBuilder, sym, converter.getCurrentLocation());
       llvm::SmallVector<mlir::Value> bounds =
           fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
                                              mlir::omp::MapBoundsType>(
               firOpBuilder, info, dataExv,
               semantics::IsAssumedSizeArray(sym.GetUltimate()),
               converter.getCurrentLocation());

       llvm::omp::OpenMPOffloadMappingFlags mapFlag =
           llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
       mlir::omp::VariableCaptureKind captureKind =
           mlir::omp::VariableCaptureKind::ByRef;

       mlir::Value baseOp = info.rawInput;
       mlir::Type eleType = baseOp.getType();
       if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
         eleType = refType.getElementType();

       // If a variable is specified in declare target link and if device
       // type is not specified as `nohost`, it needs to be mapped tofrom
       mlir::ModuleOp mod = firOpBuilder.getModule();
       mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
       auto declareTargetOp =
           llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
       if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
         if (declareTargetOp.getDeclareTargetCaptureClause() ==
                 mlir::omp::DeclareTargetCaptureClause::link &&
             declareTargetOp.getDeclareTargetDeviceType() !=
                 mlir::omp::DeclareTargetDeviceType::nohost) {
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
         }
       } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
         // Scalars behave as if they were "firstprivate".
         // TODO: Handle objects that are shared/lastprivate or were listed
         // in an in_reduction clause.
         if (isLiteralType(eleType)) {
           captureKind = mlir::omp::VariableCaptureKind::ByCopy;
         } else {
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
         }
       } else if (!fir::isa_builtin_cptr_type(eleType)) {
         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
       }
       auto location =
           mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(),
                                                    sym.name().ToString()),
                              baseOp.getLoc());
       mlir::Value mapOp = createMapInfoOp(
           firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
           name.str(), bounds, /*members=*/{},
           /*membersIndex=*/mlir::ArrayAttr{},
           static_cast<
               std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
               mapFlag),
           captureKind, baseOp.getType(), /*partialMap=*/false, mapperId);

       clauseOps.mapVars.push_back(mapOp);
       mapSyms.push_back(&sym);
     }
   };
   lower::pft::visitAllSymbols(eval, captureImplicitMap);

   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);

   llvm::SmallVector<mlir::Value> hasDeviceAddrBaseValues, mapBaseValues;
   extractMappedBaseValues(clauseOps.hasDeviceAddrVars, hasDeviceAddrBaseValues);
   extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);

   EntryBlockArgs args;
   args.hasDeviceAddr.syms = hasDeviceAddrSyms;
   args.hasDeviceAddr.vars = hasDeviceAddrBaseValues;
   args.hostEvalVars = clauseOps.hostEvalVars;
   // TODO: Add in_reduction syms and vars.
   args.map.syms = mapSyms;
   args.map.vars = mapBaseValues;
   args.priv.syms = dsp.getDelayedPrivSymbols();
   args.priv.vars = clauseOps.privateVars;

   genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
                     queue, item, dsp);

   // Remove the host_eval information structure created for this target region.
   if (!isTargetDevice)
     hostEvalInfo.pop_back();
   return targetOp;
 }

 static mlir::omp::TargetDataOp genTargetDataOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::TargetDataOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> useDeviceAddrSyms,
       useDevicePtrSyms;
   genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                        clauseOps, useDeviceAddrSyms, useDevicePtrSyms);

   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);

   llvm::SmallVector<mlir::Value> useDeviceAddrBaseValues,
       useDevicePtrBaseValues;
   extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues);
   extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues);

   EntryBlockArgs args;
   args.useDeviceAddr.syms = useDeviceAddrSyms;
   args.useDeviceAddr.vars = useDeviceAddrBaseValues;
   args.useDevicePtr.syms = useDevicePtrSyms;
   args.useDevicePtr.vars = useDevicePtrBaseValues;

   genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args,
                         loc, queue, item);
   return targetDataOp;
 }

 template <typename OpTy>
 static OpTy genTargetEnterExitUpdateDataOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     mlir::Location loc, const ConstructQueue &queue,
     ConstructQueue::const_iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   // GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
   [[maybe_unused]] llvm::omp::Directive directive;
   if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) {
     directive = llvm::omp::Directive::OMPD_target_enter_data;
   } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) {
     directive = llvm::omp::Directive::OMPD_target_exit_data;
   } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) {
     directive = llvm::omp::Directive::OMPD_target_update;
   } else {
     llvm_unreachable("Unexpected TARGET DATA construct");
   }

   mlir::omp::TargetEnterExitUpdateDataOperands clauseOps;
   genTargetEnterExitUpdateDataClauses(converter, semaCtx, symTable, stmtCtx,
                                       item->clauses, loc, directive, clauseOps);

   return firOpBuilder.create<OpTy>(loc, clauseOps);
 }

 static mlir::omp::TaskOp
 genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
           lower::StatementContext &stmtCtx,
           semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
           mlir::Location loc, const ConstructQueue &queue,
           ConstructQueue::const_iterator item) {
   mlir::omp::TaskOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> inReductionSyms;
   genTaskClauses(converter, semaCtx, symTable, stmtCtx, item->clauses, loc,
                  clauseOps, inReductionSyms);

   if (!enableDelayedPrivatization)
     return genOpWithBody<mlir::omp::TaskOp>(
         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                           llvm::omp::Directive::OMPD_task)
             .setClauses(&item->clauses),
         queue, item, clauseOps);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            lower::omp::isLastItemInQueue(item, queue),
                            /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&clauseOps);

   EntryBlockArgs taskArgs;
   taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
   taskArgs.priv.vars = clauseOps.privateVars;
   taskArgs.inReduction.syms = inReductionSyms;
   taskArgs.inReduction.vars = clauseOps.inReductionVars;

   return genOpWithBody<mlir::omp::TaskOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
           .setClauses(&item->clauses)
           .setDataSharingProcessor(&dsp)
           .setEntryBlockArgs(&taskArgs),
       queue, item, clauseOps);
 }

 static mlir::omp::TaskgroupOp
 genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                semantics::SemanticsContext &semaCtx,
                lower::pft::Evaluation &eval, mlir::Location loc,
                const ConstructQueue &queue,
                ConstructQueue::const_iterator item) {
   mlir::omp::TaskgroupOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> taskReductionSyms;
   genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps,
                       taskReductionSyms);

   EntryBlockArgs taskgroupArgs;
   taskgroupArgs.taskReduction.syms = taskReductionSyms;
   taskgroupArgs.taskReduction.vars = clauseOps.taskReductionVars;

   return genOpWithBody<mlir::omp::TaskgroupOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
           .setClauses(&item->clauses)
           .setEntryBlockArgs(&taskgroupArgs),
       queue, item, clauseOps);
 }

 static mlir::omp::TaskwaitOp
 genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               semantics::SemanticsContext &semaCtx,
               lower::pft::Evaluation &eval, mlir::Location loc,
               const ConstructQueue &queue,
               ConstructQueue::const_iterator item) {
   mlir::omp::TaskwaitOperands clauseOps;
   genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps);
   return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
                                                                    clauseOps);
 }

 static mlir::omp::TaskyieldOp
 genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                semantics::SemanticsContext &semaCtx,
                lower::pft::Evaluation &eval, mlir::Location loc,
                const ConstructQueue &queue,
                ConstructQueue::const_iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
 }

 static mlir::omp::WorkshareOp genWorkshareOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::WorkshareOperands clauseOps;
   genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                       clauseOps);

   return genOpWithBody<mlir::omp::WorkshareOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_workshare)
           .setClauses(&item->clauses),
       queue, item, clauseOps);
 }

 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
            lower::StatementContext &stmtCtx,
            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
            mlir::Location loc, const ConstructQueue &queue,
            ConstructQueue::const_iterator item) {
   mlir::omp::TeamsOperands clauseOps;
   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
   genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps,
                   reductionSyms);

   EntryBlockArgs args;
   // TODO: Add private syms and vars.
   args.reduction.syms = reductionSyms;
   args.reduction.vars = clauseOps.reductionVars;

   return genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
           .setClauses(&item->clauses)
           .setEntryBlockArgs(&args),
       queue, item, clauseOps);
 }

 //===----------------------------------------------------------------------===//
 // Code generation for atomic operations
 //===----------------------------------------------------------------------===//

 /// Populates \p hint and \p memoryOrder with appropriate clause information
 /// if present on atomic construct.
 static void genOmpAtomicHintAndMemoryOrderClauses(
     lower::AbstractConverter &converter,
     const parser::OmpAtomicClauseList &clauseList, mlir::IntegerAttr &hint,
     mlir::omp::ClauseMemoryOrderKindAttr &memoryOrder) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   for (const parser::OmpAtomicClause &clause : clauseList.v) {
     common::visit(
         common::visitors{
             [&](const parser::OmpMemoryOrderClause &s) {
               auto kind = common::visit(
                   common::visitors{
                       [&](const parser::OmpClause::AcqRel &) {
                         return mlir::omp::ClauseMemoryOrderKind::Acq_rel;
                       },
                       [&](const parser::OmpClause::Acquire &) {
                         return mlir::omp::ClauseMemoryOrderKind::Acquire;
                       },
                       [&](const parser::OmpClause::Relaxed &) {
                         return mlir::omp::ClauseMemoryOrderKind::Relaxed;
                       },
                       [&](const parser::OmpClause::Release &) {
                         return mlir::omp::ClauseMemoryOrderKind::Release;
                       },
                       [&](const parser::OmpClause::SeqCst &) {
                         return mlir::omp::ClauseMemoryOrderKind::Seq_cst;
                       },
                       [&](auto &&) -> mlir::omp::ClauseMemoryOrderKind {
                         llvm_unreachable("Unexpected clause");
                       },
                   },
                   s.v.u);
               memoryOrder = mlir::omp::ClauseMemoryOrderKindAttr::get(
                   firOpBuilder.getContext(), kind);
             },
             [&](const parser::OmpHintClause &s) {
               const auto *expr = semantics::GetExpr(s.v);
               uint64_t hintExprValue = *evaluate::ToInt64(*expr);
               hint = firOpBuilder.getI64IntegerAttr(hintExprValue);
             },
             [&](const parser::OmpFailClause &) {},
         },
         clause.u);
   }
 }

 static void processOmpAtomicTODO(mlir::Type elementType, mlir::Location loc) {
   if (!elementType)
     return;
   assert(fir::isa_trivial(fir::unwrapRefType(elementType)) &&
          "is supported type for omp atomic");
 }

 /// Used to generate atomic.read operation which is created in existing
 /// location set by builder.
 static void genAtomicCaptureStatement(
     lower::AbstractConverter &converter, mlir::Value fromAddress,
     mlir::Value toAddress,
     const parser::OmpAtomicClauseList *leftHandClauseList,
     const parser::OmpAtomicClauseList *rightHandClauseList,
     mlir::Type elementType, mlir::Location loc) {
   // Generate `atomic.read` operation for atomic assigment statements
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   processOmpAtomicTODO(elementType, loc);

   // If no hint clause is specified, the effect is as if
   // hint(omp_sync_hint_none) had been specified.
   mlir::IntegerAttr hint = nullptr;

   mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
   if (leftHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
                                           memoryOrder);
   if (rightHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
                                           memoryOrder);
   firOpBuilder.create<mlir::omp::AtomicReadOp>(loc, fromAddress, toAddress,
                                                mlir::TypeAttr::get(elementType),
                                                hint, memoryOrder);
 }

 /// Used to generate atomic.write operation which is created in existing
 /// location set by builder.
 static void genAtomicWriteStatement(
     lower::AbstractConverter &converter, mlir::Value lhsAddr,
     mlir::Value rhsExpr, const parser::OmpAtomicClauseList *leftHandClauseList,
     const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
     mlir::Value *evaluatedExprValue = nullptr) {
   // Generate `atomic.write` operation for atomic assignment statements
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
   // Create a conversion outside the capture block.
   auto insertionPoint = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointAfter(rhsExpr.getDefiningOp());
   rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr);
   firOpBuilder.restoreInsertionPoint(insertionPoint);

   processOmpAtomicTODO(varType, loc);

   // If no hint clause is specified, the effect is as if
   // hint(omp_sync_hint_none) had been specified.
   mlir::IntegerAttr hint = nullptr;
   mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
   if (leftHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
                                           memoryOrder);
   if (rightHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
                                           memoryOrder);
   firOpBuilder.create<mlir::omp::AtomicWriteOp>(loc, lhsAddr, rhsExpr, hint,
                                                 memoryOrder);
 }

 /// Used to generate atomic.update operation which is created in existing
 /// location set by builder.
 static void genAtomicUpdateStatement(
     lower::AbstractConverter &converter, mlir::Value lhsAddr,
     mlir::Type varType, const parser::Variable &assignmentStmtVariable,
     const parser::Expr &assignmentStmtExpr,
     const parser::OmpAtomicClauseList *leftHandClauseList,
     const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
     mlir::Operation *atomicCaptureOp = nullptr) {
   // Generate `atomic.update` operation for atomic assignment statements
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Location currentLocation = converter.getCurrentLocation();

   //  Create the omp.atomic.update or acc.atomic.update operation
   //
   //  func.func @_QPsb() {
   //    %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"}
   //    %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"}
   //    %2 = fir.load %1 : !fir.ref<i32>
   //    omp.atomic.update   %0 : !fir.ref<i32> {
   //    ^bb0(%arg0: i32):
   //      %3 = arith.addi %arg0, %2 : i32
   //      omp.yield(%3 : i32)
   //    }
   //    return
   //  }

   auto getArgExpression =
       [](std::list<parser::ActualArgSpec>::const_iterator it) {
         const auto &arg{std::get<parser::ActualArg>((*it).t)};
         const auto *parserExpr{
             std::get_if<common::Indirection<parser::Expr>>(&arg.u)};
         return parserExpr;
       };

   // Lower any non atomic sub-expression before the atomic operation, and
   // map its lowered value to the semantic representation.
   lower::ExprToValueMap exprValueOverrides;
   // Max and min intrinsics can have a list of Args. Hence we need a list
   // of nonAtomicSubExprs to hoist. Currently, only the load is hoisted.
   llvm::SmallVector<const lower::SomeExpr *> nonAtomicSubExprs;
   common::visit(
       common::visitors{
           [&](const common::Indirection<parser::FunctionReference> &funcRef)
               -> void {
             const auto &args{std::get<std::list<parser::ActualArgSpec>>(
                 funcRef.value().v.t)};
             std::list<parser::ActualArgSpec>::const_iterator beginIt =
                 args.begin();
             std::list<parser::ActualArgSpec>::const_iterator endIt = args.end();
             const auto *exprFirst{getArgExpression(beginIt)};
             if (exprFirst && exprFirst->value().source ==
                                  assignmentStmtVariable.GetSource()) {
               // Add everything except the first
               beginIt++;
             } else {
               // Add everything except the last
               endIt--;
             }
             std::list<parser::ActualArgSpec>::const_iterator it;
             for (it = beginIt; it != endIt; it++) {
               const common::Indirection<parser::Expr> *expr =
                   getArgExpression(it);
               if (expr)
                 nonAtomicSubExprs.push_back(semantics::GetExpr(*expr));
             }
           },
           [&](const auto &op) -> void {
             using T = std::decay_t<decltype(op)>;
             if constexpr (std::is_base_of<parser::Expr::IntrinsicBinary,
                                           T>::value) {
               const auto &exprLeft{std::get<0>(op.t)};
               const auto &exprRight{std::get<1>(op.t)};
               if (exprLeft.value().source == assignmentStmtVariable.GetSource())
                 nonAtomicSubExprs.push_back(semantics::GetExpr(exprRight));
               else
                 nonAtomicSubExprs.push_back(semantics::GetExpr(exprLeft));
             }
           },
       },
       assignmentStmtExpr.u);
   lower::StatementContext nonAtomicStmtCtx;
   if (!nonAtomicSubExprs.empty()) {
     // Generate non atomic part before all the atomic operations.
     auto insertionPoint = firOpBuilder.saveInsertionPoint();
     if (atomicCaptureOp)
       firOpBuilder.setInsertionPoint(atomicCaptureOp);
     mlir::Value nonAtomicVal;
     for (auto *nonAtomicSubExpr : nonAtomicSubExprs) {
       nonAtomicVal = fir::getBase(converter.genExprValue(
           currentLocation, *nonAtomicSubExpr, nonAtomicStmtCtx));
       exprValueOverrides.try_emplace(nonAtomicSubExpr, nonAtomicVal);
     }
     if (atomicCaptureOp)
       firOpBuilder.restoreInsertionPoint(insertionPoint);
   }

   mlir::Operation *atomicUpdateOp = nullptr;
   // If no hint clause is specified, the effect is as if
   // hint(omp_sync_hint_none) had been specified.
   mlir::IntegerAttr hint = nullptr;
   mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
   if (leftHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
                                           memoryOrder);
   if (rightHandClauseList)
     genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
                                           memoryOrder);
   atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
       currentLocation, lhsAddr, hint, memoryOrder);

   processOmpAtomicTODO(varType, loc);

   llvm::SmallVector<mlir::Type> varTys = {varType};
   llvm::SmallVector<mlir::Location> locs = {currentLocation};
   firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs);
   mlir::Value val =
       fir::getBase(atomicUpdateOp->getRegion(0).front().getArgument(0));

   exprValueOverrides.try_emplace(semantics::GetExpr(assignmentStmtVariable),
                                  val);
   {
     // statement context inside the atomic block.
     converter.overrideExprValues(&exprValueOverrides);
     lower::StatementContext atomicStmtCtx;
     mlir::Value rhsExpr = fir::getBase(converter.genExprValue(
         *semantics::GetExpr(assignmentStmtExpr), atomicStmtCtx));
     mlir::Type exprType = fir::unwrapRefType(rhsExpr.getType());
     if (fir::isa_complex(exprType) && !fir::isa_complex(varType)) {
       // Emit an additional `ExtractValueOp` if the expression is of complex
       // type
       auto extract = firOpBuilder.create<fir::ExtractValueOp>(
           currentLocation,
           mlir::cast<mlir::ComplexType>(exprType).getElementType(), rhsExpr,
           firOpBuilder.getArrayAttr(
               firOpBuilder.getIntegerAttr(firOpBuilder.getIndexType(), 0)));
       mlir::Value convertResult = firOpBuilder.create<fir::ConvertOp>(
           currentLocation, varType, extract);
       firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
     } else {
       mlir::Value convertResult =
           firOpBuilder.createConvert(currentLocation, varType, rhsExpr);
       firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
     }
     converter.resetExprOverrides();
   }
   firOpBuilder.setInsertionPointAfter(atomicUpdateOp);
 }

 /// Processes an atomic construct with write clause.
 static void genAtomicWrite(lower::AbstractConverter &converter,
                            const parser::OmpAtomicWrite &atomicWrite,
                            mlir::Location loc) {
   const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
   const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
   // Get the address of atomic read operands.
   rightHandClauseList = &std::get<2>(atomicWrite.t);
   leftHandClauseList = &std::get<0>(atomicWrite.t);

   const parser::AssignmentStmt &stmt =
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicWrite.t)
           .statement;
   const evaluate::Assignment &assign = *stmt.typedAssignment->v;
   lower::StatementContext stmtCtx;
   // Get the value and address of atomic write operands.
   mlir::Value rhsExpr =
       fir::getBase(converter.genExprValue(assign.rhs, stmtCtx));
   mlir::Value lhsAddr =
       fir::getBase(converter.genExprAddr(assign.lhs, stmtCtx));
   genAtomicWriteStatement(converter, lhsAddr, rhsExpr, leftHandClauseList,
                           rightHandClauseList, loc);
 }

 /// Processes an atomic construct with read clause.
 static void genAtomicRead(lower::AbstractConverter &converter,
                           const parser::OmpAtomicRead &atomicRead,
                           mlir::Location loc) {
   const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
   const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
   // Get the address of atomic read operands.
   rightHandClauseList = &std::get<2>(atomicRead.t);
   leftHandClauseList = &std::get<0>(atomicRead.t);

   const auto &assignmentStmtExpr = std::get<parser::Expr>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
           .statement.t);
   const auto &assignmentStmtVariable = std::get<parser::Variable>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
           .statement.t);

   lower::StatementContext stmtCtx;
   const semantics::SomeExpr &fromExpr = *semantics::GetExpr(assignmentStmtExpr);
   mlir::Type elementType = converter.genType(fromExpr);
   mlir::Value fromAddress =
       fir::getBase(converter.genExprAddr(fromExpr, stmtCtx));
   mlir::Value toAddress = fir::getBase(converter.genExprAddr(
       *semantics::GetExpr(assignmentStmtVariable), stmtCtx));

   if (fromAddress.getType() != toAddress.getType()) {
     // Emit an implicit cast. Different yet compatible types on
     // omp.atomic.read constitute valid Fortran. The OMPIRBuilder will
     // emit atomic instructions (on primitive types) and `__atomic_load`
     // libcall (on complex type) without explicitly converting
     // between such compatible types. The OMPIRBuilder relies on the
     // frontend to resolve such inconsistencies between `omp.atomic.read `
     // operand types. Similar inconsistencies between operand types in
     // `omp.atomic.write` are resolved through implicit casting by use of typed
     // assignment (i.e. `evaluate::Assignment`). However, use of typed
     // assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe,
     // non-atomic load of `x` into a temporary `alloca`, followed by an atomic
     // read of form `v = alloca`. Hence, it is needed to perform a custom
     // implicit cast.

     // An atomic read of form `v = x` would (without implicit casting)
     // lower to `omp.atomic.read %v = %x : !fir.ref<type1>, !fir.ref<type2>,
     // type2`. This implicit casting will rather generate the following FIR:
     //
     // 	 %alloca = fir.alloca type2
     //	 omp.atomic.read %alloca = %x : !fir.ref<type2>, !fir.ref<type2>, type2
     //	 %load = fir.load %alloca : !fir.ref<type2>
     //	 %cvt = fir.convert %load : (type2) -> type1
     //	 fir.store %cvt to %v : !fir.ref<type1>

     // These sequence of operations is thread-safe since each thread allocates
     // the `alloca` in its stack, and performs `%alloca = %x` atomically. Once
     // safely read, each thread performs the implicit cast on the local
     // `alloca`, and writes the final result to `%v`.
     mlir::Type toType = fir::unwrapRefType(toAddress.getType());
     mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
     fir::FirOpBuilder &builder = converter.getFirOpBuilder();
     auto oldIP = builder.saveInsertionPoint();
     builder.setInsertionPointToStart(builder.getAllocaBlock());
     mlir::Value alloca = builder.create<fir::AllocaOp>(
         loc, fromType); // Thread scope `alloca` to atomically read `%x`.
     builder.restoreInsertionPoint(oldIP);
     genAtomicCaptureStatement(converter, fromAddress, alloca,
                               leftHandClauseList, rightHandClauseList,
                               elementType, loc);
     auto load = builder.create<fir::LoadOp>(loc, alloca);
     if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
       // Emit an additional `ExtractValueOp` if `fromAddress` is of complex
       // type, but `toAddress` is not.
       auto extract = builder.create<fir::ExtractValueOp>(
           loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
           builder.getArrayAttr(
               builder.getIntegerAttr(builder.getIndexType(), 0)));
       auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
       builder.create<fir::StoreOp>(loc, cvt, toAddress);
     } else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
       // Emit an additional `InsertValueOp` if `toAddress` is of complex
       // type, but `fromAddress` is not.
       mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
       mlir::Type complexEleTy =
           mlir::cast<mlir::ComplexType>(toType).getElementType();
       mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
       mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
       mlir::Value idx0 = builder.create<fir::InsertValueOp>(
           loc, toType, undef, cvt,
           builder.getArrayAttr(
               builder.getIntegerAttr(builder.getIndexType(), 0)));
       mlir::Value idx1 = builder.create<fir::InsertValueOp>(
           loc, toType, idx0, zero,
           builder.getArrayAttr(
               builder.getIntegerAttr(builder.getIndexType(), 1)));
       builder.create<fir::StoreOp>(loc, idx1, toAddress);
     } else {
       auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
       builder.create<fir::StoreOp>(loc, cvt, toAddress);
     }
   } else
     genAtomicCaptureStatement(converter, fromAddress, toAddress,
                               leftHandClauseList, rightHandClauseList,
                               elementType, loc);
 }

 /// Processes an atomic construct with update clause.
 static void genAtomicUpdate(lower::AbstractConverter &converter,
                             const parser::OmpAtomicUpdate &atomicUpdate,
                             mlir::Location loc) {
   const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
   const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
   // Get the address of atomic read operands.
   rightHandClauseList = &std::get<2>(atomicUpdate.t);
   leftHandClauseList = &std::get<0>(atomicUpdate.t);

   const auto &assignmentStmtExpr = std::get<parser::Expr>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
           .statement.t);
   const auto &assignmentStmtVariable = std::get<parser::Variable>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
           .statement.t);

   lower::StatementContext stmtCtx;
   mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
       *semantics::GetExpr(assignmentStmtVariable), stmtCtx));
   mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
   genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
                            assignmentStmtExpr, leftHandClauseList,
                            rightHandClauseList, loc);
 }

 /// Processes an atomic construct with no clause - which implies update clause.
 static void genOmpAtomic(lower::AbstractConverter &converter,
                          const parser::OmpAtomic &atomicConstruct,
                          mlir::Location loc) {
   const parser::OmpAtomicClauseList &atomicClauseList =
       std::get<parser::OmpAtomicClauseList>(atomicConstruct.t);
   const auto &assignmentStmtExpr = std::get<parser::Expr>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
           .statement.t);
   const auto &assignmentStmtVariable = std::get<parser::Variable>(
       std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
           .statement.t);
   lower::StatementContext stmtCtx;
   mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
       *semantics::GetExpr(assignmentStmtVariable), stmtCtx));
   mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
   // If atomic-clause is not present on the construct, the behaviour is as if
   // the update clause is specified (for both OpenMP and OpenACC).
   genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
                            assignmentStmtExpr, &atomicClauseList, nullptr, loc);
 }

 /// Processes an atomic construct with capture clause.
 static void genAtomicCapture(lower::AbstractConverter &converter,
                              const parser::OmpAtomicCapture &atomicCapture,
                              mlir::Location loc) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

   const parser::AssignmentStmt &stmt1 =
       std::get<parser::OmpAtomicCapture::Stmt1>(atomicCapture.t).v.statement;
   const evaluate::Assignment &assign1 = *stmt1.typedAssignment->v;
   const auto &stmt1Var{std::get<parser::Variable>(stmt1.t)};
   const auto &stmt1Expr{std::get<parser::Expr>(stmt1.t)};
   const parser::AssignmentStmt &stmt2 =
       std::get<parser::OmpAtomicCapture::Stmt2>(atomicCapture.t).v.statement;
   const evaluate::Assignment &assign2 = *stmt2.typedAssignment->v;
   const auto &stmt2Var{std::get<parser::Variable>(stmt2.t)};
   const auto &stmt2Expr{std::get<parser::Expr>(stmt2.t)};

   // Pre-evaluate expressions to be used in the various operations inside
   // `atomic.capture` since it is not desirable to have anything other than
   // a `atomic.read`, `atomic.write`, or `atomic.update` operation
   // inside `atomic.capture`
   lower::StatementContext stmtCtx;
   // LHS evaluations are common to all combinations of `atomic.capture`
   mlir::Value stmt1LHSArg =
       fir::getBase(converter.genExprAddr(assign1.lhs, stmtCtx));
   mlir::Value stmt2LHSArg =
       fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx));

   // Type information used in generation of `atomic.update` operation
   mlir::Type stmt1VarType =
       fir::getBase(converter.genExprValue(assign1.lhs, stmtCtx)).getType();
   mlir::Type stmt2VarType =
       fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();

   // Check if implicit type is needed
   if (stmt1VarType != stmt2VarType)
     TODO(loc, "atomic capture requiring implicit type casts");

   mlir::Operation *atomicCaptureOp = nullptr;
   mlir::IntegerAttr hint = nullptr;
   mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
   const parser::OmpAtomicClauseList &rightHandClauseList =
       std::get<2>(atomicCapture.t);
   const parser::OmpAtomicClauseList &leftHandClauseList =
       std::get<0>(atomicCapture.t);
   genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint,
                                         memoryOrder);
   genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint,
                                         memoryOrder);
   atomicCaptureOp =
       firOpBuilder.create<mlir::omp::AtomicCaptureOp>(loc, hint, memoryOrder);

   firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0)));
   mlir::Block &block = atomicCaptureOp->getRegion(0).back();
   firOpBuilder.setInsertionPointToStart(&block);
   if (semantics::checkForSingleVariableOnRHS(stmt1)) {
     if (semantics::checkForSymbolMatch(stmt2)) {
       // Atomic capture construct is of the form [capture-stmt, update-stmt]
       const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
       mlir::Type elementType = converter.genType(fromExpr);
       genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
                                 /*leftHandClauseList=*/nullptr,
                                 /*rightHandClauseList=*/nullptr, elementType,
                                 loc);
       genAtomicUpdateStatement(
           converter, stmt2LHSArg, stmt2VarType, stmt2Var, stmt2Expr,
           /*leftHandClauseList=*/nullptr,
           /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
     } else {
       // Atomic capture construct is of the form [capture-stmt, write-stmt]
       firOpBuilder.setInsertionPoint(atomicCaptureOp);
       mlir::Value stmt2RHSArg =
           fir::getBase(converter.genExprValue(assign2.rhs, stmtCtx));
       firOpBuilder.setInsertionPointToStart(&block);
       const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
       mlir::Type elementType = converter.genType(fromExpr);
       genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
                                 /*leftHandClauseList=*/nullptr,
                                 /*rightHandClauseList=*/nullptr, elementType,
                                 loc);
       genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg,
                               /*leftHandClauseList=*/nullptr,
                               /*rightHandClauseList=*/nullptr, loc);
     }
   } else {
     // Atomic capture construct is of the form [update-stmt, capture-stmt]
     const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt2Expr);
     mlir::Type elementType = converter.genType(fromExpr);
     genAtomicUpdateStatement(
         converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr,
         /*leftHandClauseList=*/nullptr,
         /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
     genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg,
                               /*leftHandClauseList=*/nullptr,
                               /*rightHandClauseList=*/nullptr, elementType,
                               loc);
   }
   firOpBuilder.setInsertionPointToEnd(&block);
   firOpBuilder.create<mlir::omp::TerminatorOp>(loc);
   firOpBuilder.setInsertionPointToStart(&block);
 }

 //===----------------------------------------------------------------------===//
 // Code generation functions for the standalone version of constructs that can
 // also be a leaf of a composite construct
 //===----------------------------------------------------------------------===//

 static mlir::omp::DistributeOp genStandaloneDistribute(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::DistributeOperands distributeClauseOps;
   genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                        distributeClauseOps);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
   dsp.processStep1(&distributeClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);

   EntryBlockArgs distributeArgs;
   distributeArgs.priv.syms = dsp.getDelayedPrivSymbols();
   distributeArgs.priv.vars = distributeClauseOps.privateVars;
   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
       converter, loc, distributeClauseOps, distributeArgs);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{distributeOp, distributeArgs}},
                 llvm::omp::Directive::OMPD_distribute, dsp);
   return distributeOp;
 }

 static mlir::omp::WsloopOp genStandaloneDo(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::WsloopOperands wsloopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
   genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                    wsloopClauseOps, wsloopReductionSyms);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
   dsp.processStep1(&wsloopClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);

   EntryBlockArgs wsloopArgs;
   wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
   wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
   wsloopArgs.reduction.syms = wsloopReductionSyms;
   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
       converter, loc, wsloopClauseOps, wsloopArgs);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}},
                 llvm::omp::Directive::OMPD_do, dsp);
   return wsloopOp;
 }

 static mlir::omp::ParallelOp genStandaloneParallel(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::ParallelOperands parallelClauseOps;
   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
   genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                      parallelClauseOps, parallelReductionSyms);

   std::optional<DataSharingProcessor> dsp;
   if (enableDelayedPrivatization) {
     dsp.emplace(converter, semaCtx, item->clauses, eval,
                 lower::omp::isLastItemInQueue(item, queue),
                 /*useDelayedPrivatization=*/true, symTable);
     dsp->processStep1(&parallelClauseOps);
   }

   EntryBlockArgs parallelArgs;
   if (dsp)
     parallelArgs.priv.syms = dsp->getDelayedPrivSymbols();
   parallelArgs.priv.vars = parallelClauseOps.privateVars;
   parallelArgs.reduction.syms = parallelReductionSyms;
   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
   return genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
                        parallelClauseOps, parallelArgs,
                        enableDelayedPrivatization ? &dsp.value() : nullptr);
 }

 static mlir::omp::SimdOp
 genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
                   semantics::SemanticsContext &semaCtx,
                   lower::pft::Evaluation &eval, mlir::Location loc,
                   const ConstructQueue &queue,
                   ConstructQueue::const_iterator item) {
   mlir::omp::SimdOperands simdClauseOps;
   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
   genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps,
                  simdReductionSyms);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
   dsp.processStep1(&simdClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);

   EntryBlockArgs simdArgs;
   simdArgs.priv.syms = dsp.getDelayedPrivSymbols();
   simdArgs.priv.vars = simdClauseOps.privateVars;
   simdArgs.reduction.syms = simdReductionSyms;
   simdArgs.reduction.vars = simdClauseOps.reductionVars;
   auto simdOp =
       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{simdOp, simdArgs}},
                 llvm::omp::Directive::OMPD_simd, dsp);
   return simdOp;
 }

 static mlir::omp::TaskloopOp genStandaloneTaskloop(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     mlir::Location loc, const ConstructQueue &queue,
     ConstructQueue::const_iterator item) {
   mlir::omp::TaskloopOperands taskloopClauseOps;
   genTaskloopClauses(converter, semaCtx, item->clauses, loc, taskloopClauseOps);

   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
   dsp.processStep1(&taskloopClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);

   EntryBlockArgs taskloopArgs;
   taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
   taskloopArgs.priv.vars = taskloopClauseOps.privateVars;

   auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
       converter, loc, taskloopClauseOps, taskloopArgs);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}},
                 llvm::omp::Directive::OMPD_taskloop, dsp);
   return taskLoopOp;
 }

 //===----------------------------------------------------------------------===//
 // Code generation functions for composite constructs
 //===----------------------------------------------------------------------===//

 static mlir::omp::DistributeOp genCompositeDistributeParallelDo(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
   ConstructQueue::const_iterator distributeItem = item;
   ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
   ConstructQueue::const_iterator doItem = std::next(parallelItem);

   // Create parent omp.parallel first.
   mlir::omp::ParallelOperands parallelClauseOps;
   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
   genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
                      parallelClauseOps, parallelReductionSyms);

   DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&parallelClauseOps);

   EntryBlockArgs parallelArgs;
   parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
   parallelArgs.priv.vars = parallelClauseOps.privateVars;
   parallelArgs.reduction.syms = parallelReductionSyms;
   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
                 parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true);

   // Clause processing.
   mlir::omp::DistributeOperands distributeClauseOps;
   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
                        loc, distributeClauseOps);

   mlir::omp::WsloopOperands wsloopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
                    wsloopClauseOps, wsloopReductionSyms);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc,
                      loopNestClauseOps, iv);

   // Operation creation.
   EntryBlockArgs distributeArgs;
   // TODO: Add private syms and vars.
   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
       converter, loc, distributeClauseOps, distributeArgs);
   distributeOp.setComposite(/*val=*/true);

   EntryBlockArgs wsloopArgs;
   // TODO: Add private syms and vars.
   wsloopArgs.reduction.syms = wsloopReductionSyms;
   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
       converter, loc, wsloopClauseOps, wsloopArgs);
   wsloopOp.setComposite(/*val=*/true);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem,
                 loopNestClauseOps, iv,
                 {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}},
                 llvm::omp::Directive::OMPD_distribute_parallel_do, dsp);
   return distributeOp;
 }

 static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
   ConstructQueue::const_iterator distributeItem = item;
   ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
   ConstructQueue::const_iterator doItem = std::next(parallelItem);
   ConstructQueue::const_iterator simdItem = std::next(doItem);

   // Create parent omp.parallel first.
   mlir::omp::ParallelOperands parallelClauseOps;
   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
   genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
                      parallelClauseOps, parallelReductionSyms);

   DataSharingProcessor parallelItemDSP(
       converter, semaCtx, parallelItem->clauses, eval,
       /*shouldCollectPreDeterminedSymbols=*/false,
       /*useDelayedPrivatization=*/true, symTable);
   parallelItemDSP.processStep1(&parallelClauseOps);

   EntryBlockArgs parallelArgs;
   parallelArgs.priv.syms = parallelItemDSP.getDelayedPrivSymbols();
   parallelArgs.priv.vars = parallelClauseOps.privateVars;
   parallelArgs.reduction.syms = parallelReductionSyms;
   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
                 parallelClauseOps, parallelArgs, &parallelItemDSP,
                 /*isComposite=*/true);

   // Clause processing.
   mlir::omp::DistributeOperands distributeClauseOps;
   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
                        loc, distributeClauseOps);

   mlir::omp::WsloopOperands wsloopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
                    wsloopClauseOps, wsloopReductionSyms);

   mlir::omp::SimdOperands simdClauseOps;
   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
                  simdReductionSyms);

   DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval,
                                    /*shouldCollectPreDeterminedSymbols=*/true,
                                    /*useDelayedPrivatization=*/true, symTable);
   simdItemDSP.processStep1(&simdClauseOps);

   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
                      loopNestClauseOps, iv);

   // Operation creation.
   EntryBlockArgs distributeArgs;
   // TODO: Add private syms and vars.
   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
       converter, loc, distributeClauseOps, distributeArgs);
   distributeOp.setComposite(/*val=*/true);

   EntryBlockArgs wsloopArgs;
   // TODO: Add private syms and vars.
   wsloopArgs.reduction.syms = wsloopReductionSyms;
   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
       converter, loc, wsloopClauseOps, wsloopArgs);
   wsloopOp.setComposite(/*val=*/true);

   EntryBlockArgs simdArgs;
   simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols();
   simdArgs.priv.vars = simdClauseOps.privateVars;
   simdArgs.reduction.syms = simdReductionSyms;
   simdArgs.reduction.vars = simdClauseOps.reductionVars;
   auto simdOp =
       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
   simdOp.setComposite(/*val=*/true);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
                 loopNestClauseOps, iv,
                 {{distributeOp, distributeArgs},
                  {wsloopOp, wsloopArgs},
                  {simdOp, simdArgs}},
                 llvm::omp::Directive::OMPD_distribute_parallel_do_simd,
                 simdItemDSP);
   return distributeOp;
 }

 static mlir::omp::DistributeOp genCompositeDistributeSimd(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
   ConstructQueue::const_iterator distributeItem = item;
   ConstructQueue::const_iterator simdItem = std::next(distributeItem);

   // Clause processing.
   mlir::omp::DistributeOperands distributeClauseOps;
   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
                        loc, distributeClauseOps);

   mlir::omp::SimdOperands simdClauseOps;
   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
                  simdReductionSyms);

   // TODO: Support delayed privatization.
   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();

   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
   // is placed by construct decomposition.
   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
                      loopNestClauseOps, iv);

   // Operation creation.
   EntryBlockArgs distributeArgs;
   // TODO: Add private syms and vars.
   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
       converter, loc, distributeClauseOps, distributeArgs);
   distributeOp.setComposite(/*val=*/true);

   EntryBlockArgs simdArgs;
   // TODO: Add private syms and vars.
   simdArgs.reduction.syms = simdReductionSyms;
   simdArgs.reduction.vars = simdClauseOps.reductionVars;
   auto simdOp =
       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
   simdOp.setComposite(/*val=*/true);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
                 loopNestClauseOps, iv,
                 {{distributeOp, distributeArgs}, {simdOp, simdArgs}},
                 llvm::omp::Directive::OMPD_distribute_simd, dsp);
   return distributeOp;
 }

 static mlir::omp::WsloopOp genCompositeDoSimd(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
   ConstructQueue::const_iterator doItem = item;
   ConstructQueue::const_iterator simdItem = std::next(doItem);

   // Clause processing.
   mlir::omp::WsloopOperands wsloopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
                    wsloopClauseOps, wsloopReductionSyms);

   mlir::omp::SimdOperands simdClauseOps;
   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
                  simdReductionSyms);

   // TODO: Support delayed privatization.
   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();

   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
   // is placed by construct decomposition.
   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
                      loopNestClauseOps, iv);

   // Operation creation.
   EntryBlockArgs wsloopArgs;
   // TODO: Add private syms and vars.
   wsloopArgs.reduction.syms = wsloopReductionSyms;
   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
       converter, loc, wsloopClauseOps, wsloopArgs);
   wsloopOp.setComposite(/*val=*/true);

   EntryBlockArgs simdArgs;
   // TODO: Add private syms and vars.
   simdArgs.reduction.syms = simdReductionSyms;
   simdArgs.reduction.vars = simdClauseOps.reductionVars;
   auto simdOp =
       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
   simdOp.setComposite(/*val=*/true);

   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
                 loopNestClauseOps, iv,
                 {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}},
                 llvm::omp::Directive::OMPD_do_simd, dsp);
   return wsloopOp;
 }

 static mlir::omp::TaskloopOp genCompositeTaskloopSimd(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
   TODO(loc, "Composite TASKLOOP SIMD");
   return nullptr;
 }

 //===----------------------------------------------------------------------===//
 // Dispatch
 //===----------------------------------------------------------------------===//

 static bool genOMPCompositeDispatch(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item,
     mlir::Operation *&newOp) {
   using llvm::omp::Directive;
   using lower::omp::matchLeafSequence;

   // TODO: Privatization for composite constructs is currently only done based
   // on the clauses for their last leaf construct, which may not always be
   // correct. Consider per-leaf privatization of composite constructs once
   // delayed privatization is supported by all participating ops.
   if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
     newOp = genCompositeDistributeParallelDo(converter, symTable, stmtCtx,
                                              semaCtx, eval, loc, queue, item);
   else if (matchLeafSequence(item, queue,
                              Directive::OMPD_distribute_parallel_do_simd))
     newOp = genCompositeDistributeParallelDoSimd(
         converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
   else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
     newOp = genCompositeDistributeSimd(converter, symTable, stmtCtx, semaCtx,
                                        eval, loc, queue, item);
   else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
     newOp = genCompositeDoSimd(converter, symTable, stmtCtx, semaCtx, eval, loc,
                                queue, item);
   else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
     newOp = genCompositeTaskloopSimd(converter, symTable, stmtCtx, semaCtx,
                                      eval, loc, queue, item);
   else
     return false;

   return true;
 }

 static void genOMPDispatch(lower::AbstractConverter &converter,
                            lower::SymMap &symTable,
                            semantics::SemanticsContext &semaCtx,
                            lower::pft::Evaluation &eval, mlir::Location loc,
                            const ConstructQueue &queue,
                            ConstructQueue::const_iterator item) {
   assert(item != queue.end());

   lower::StatementContext stmtCtx;
   mlir::Operation *newOp = nullptr;

   // Generate cleanup code for the stmtCtx after newOp
   auto finalizeStmtCtx = [&]() {
     if (newOp) {
       fir::FirOpBuilder &builder = converter.getFirOpBuilder();
       fir::FirOpBuilder::InsertionGuard guard(builder);
       builder.setInsertionPointAfter(newOp);
       stmtCtx.finalizeAndPop();
     }
   };

   bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
                   llvm::omp::Association::Loop;
   if (loopLeaf) {
     symTable.pushScope();
     if (genOMPCompositeDispatch(converter, symTable, stmtCtx, semaCtx, eval,
                                 loc, queue, item, newOp)) {
       symTable.popScope();
       finalizeStmtCtx();
       return;
     }
   }

   switch (llvm::omp::Directive dir = item->id) {
   case llvm::omp::Directive::OMPD_barrier:
     newOp = genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_distribute:
     newOp = genStandaloneDistribute(converter, symTable, stmtCtx, semaCtx, eval,
                                     loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_do:
     newOp = genStandaloneDo(converter, symTable, stmtCtx, semaCtx, eval, loc,
                             queue, item);
     break;
   case llvm::omp::Directive::OMPD_loop:
     newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_masked:
     newOp = genMaskedOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                         item);
     break;
   case llvm::omp::Directive::OMPD_master:
     newOp = genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_ordered:
     // Block-associated "ordered" construct.
     newOp = genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue,
                                item);
     break;
   case llvm::omp::Directive::OMPD_parallel:
     newOp = genStandaloneParallel(converter, symTable, stmtCtx, semaCtx, eval,
                                   loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_scan:
     newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_section:
     llvm_unreachable("genOMPDispatch: OMPD_section");
     // Lowered in the enclosing genSectionsOp.
     break;
   case llvm::omp::Directive::OMPD_sections:
     // Called directly from genOMP([...], OpenMPSectionsConstruct) because it
     // has a different prototype.
     // This code path is still taken when iterating through the construct queue
     // in genBodyOfOp
     break;
   case llvm::omp::Directive::OMPD_simd:
     newOp =
         genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_scope:
     newOp = genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_single:
     newOp = genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_target:
     newOp = genTargetOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                         item);
     break;
   case llvm::omp::Directive::OMPD_target_data:
     newOp = genTargetDataOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
                             queue, item);
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
         converter, symTable, stmtCtx, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
         converter, symTable, stmtCtx, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_target_update:
     newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
         converter, symTable, stmtCtx, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_task:
     newOp = genTaskOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                       item);
     break;
   case llvm::omp::Directive::OMPD_taskgroup:
     newOp =
         genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_taskloop:
     newOp = genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc,
                                   queue, item);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
     newOp = genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_taskyield:
     newOp =
         genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_teams:
     newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                        item);
     break;
   case llvm::omp::Directive::OMPD_tile:
   case llvm::omp::Directive::OMPD_unroll:
     TODO(loc, "Unhandled loop directive (" +
                   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
     newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
                            queue, item);
     break;
   default:
     // Combined and composite constructs should have been split into a sequence
     // of leaf constructs when building the construct queue.
     assert(!llvm::omp::isLeafConstruct(dir) &&
            "Unexpected compound construct.");
     break;
   }

   finalizeStmtCtx();
   if (loopLeaf)
     symTable.popScope();
 }

 //===----------------------------------------------------------------------===//
 // OpenMPDeclarativeConstruct visitors
 //===----------------------------------------------------------------------===//
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPUtilityConstruct &);

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
   TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDeclarativeAssumes &assumesConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
 }

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OmpDeclareVariantDirective &declareVariantDirective) {
   TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
 }

 static void genOMP(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
 }

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
 }

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) {
   mlir::Location loc = converter.genLocation(declareMapperConstruct.source);
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   lower::StatementContext stmtCtx;
   const auto &spec =
       std::get<parser::OmpMapperSpecifier>(declareMapperConstruct.t);
   const auto &mapperName{std::get<std::optional<parser::Name>>(spec.t)};
   const auto &varType{std::get<parser::TypeSpec>(spec.t)};
   const auto &varName{std::get<parser::Name>(spec.t)};
   assert(varType.declTypeSpec->category() ==
              semantics::DeclTypeSpec::Category::TypeDerived &&
          "Expected derived type");

   std::string mapperNameStr;
   if (mapperName.has_value()) {
     mapperNameStr = mapperName->ToString();
     mapperNameStr =
         converter.mangleName(mapperNameStr, mapperName->symbol->owner());
   } else {
     mapperNameStr =
         varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default";
     mapperNameStr = converter.mangleName(
         mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope());
   }

   // Save current insertion point before moving to the module scope to create
   // the DeclareMapperOp
   mlir::OpBuilder::InsertionGuard guard(firOpBuilder);

   firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody());
   auto mlirType = converter.genType(varType.declTypeSpec->derivedTypeSpec());
   auto declMapperOp = firOpBuilder.create<mlir::omp::DeclareMapperOp>(
       loc, mapperNameStr, mlirType);
   auto &region = declMapperOp.getRegion();
   firOpBuilder.createBlock(&region);
   auto varVal = region.addArgument(firOpBuilder.getRefType(mlirType), loc);
   converter.bindSymbol(*varName.symbol, varVal);

   // Populate the declareMapper region with the map information.
   mlir::omp::DeclareMapperInfoOperands clauseOps;
   const auto *clauseList{
       parser::Unwrap<parser::OmpClauseList>(declareMapperConstruct.t)};
   List<Clause> clauses = makeClauses(*clauseList, semaCtx);
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processMap(loc, stmtCtx, clauseOps);
   firOpBuilder.create<mlir::omp::DeclareMapperInfoOp>(loc, clauseOps.mapVars);
 }

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
   mlir::omp::DeclareTargetOperands clauseOps;
   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);

   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
     mlir::Operation *op = mod.lookupSymbol(
         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));

     // Some symbols are deferred until later in the module, these are handled
     // upon finalization of the module for OpenMP inside of Bridge, so we simply
     // skip for now.
     if (!op)
       continue;

     markDeclareTarget(
         op, converter,
         std::get<mlir::omp::DeclareTargetCaptureClause>(symClause),
         clauseOps.deviceType);
   }
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPRequiresConstruct &requiresConstruct) {
   // Requires directives are gathered and processed in semantics and
   // then combined in the lowering bridge before triggering codegen
   // just once. Hence, there is no need to lower each individual
   // occurrence here.
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPThreadprivate &threadprivate) {
   // The directive is lowered when instantiating the variable to
   // support the case of threadprivate variable declared in module.
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OmpMetadirectiveDirective &meta) {
   TODO(converter.getCurrentLocation(), "METADIRECTIVE");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) {
   Fortran::common::visit(
       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
       ompDeclConstruct.u);
 }

 //===----------------------------------------------------------------------===//
 // OpenMPStandaloneConstruct visitors
 //===----------------------------------------------------------------------===//

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPSimpleStandaloneConstruct &construct) {
   const auto &directive = std::get<parser::OmpDirectiveName>(construct.v.t);
   List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);

   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                           eval, directive.source, directive.v, clauses)};
   if (directive.v == llvm::omp::Directive::OMPD_ordered) {
     // Standalone "ordered" directive.
     genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
   } else {
     // Dispatch handles the "block-associated" variant of "ordered".
     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                    queue.begin());
   }
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPFlushConstruct &construct) {
   const auto &argumentList = construct.v.Arguments();
   const auto &clauseList = construct.v.Clauses();
   ObjectList objects = makeObjects(argumentList, semaCtx);
   List<Clause> clauses =
       makeList(clauseList.v, [&](auto &&s) { return makeClause(s, semaCtx); });
   mlir::Location currentLocation = converter.genLocation(construct.source);

   ConstructQueue queue{buildConstructQueue(
       converter.getFirOpBuilder().getModule(), semaCtx, eval, construct.source,
       llvm::omp::Directive::OMPD_flush, clauses)};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
              queue, queue.begin());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPCancelConstruct &cancelConstruct) {
   List<Clause> clauses = makeList(cancelConstruct.v.Clauses().v, [&](auto &&s) {
     return makeClause(s, semaCtx);
   });
   mlir::Location loc = converter.genLocation(cancelConstruct.source);

   ConstructQueue queue{buildConstructQueue(
       converter.getFirOpBuilder().getModule(), semaCtx, eval,
       cancelConstruct.source, llvm::omp::Directive::OMPD_cancel, clauses)};
   genCancelOp(converter, semaCtx, eval, loc, queue, queue.begin());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPCancellationPointConstruct
                        &cancellationPointConstruct) {
   List<Clause> clauses =
       makeList(cancellationPointConstruct.v.Clauses().v,
                [&](auto &&s) { return makeClause(s, semaCtx); });
   mlir::Location loc = converter.genLocation(cancellationPointConstruct.source);

   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                           eval, cancellationPointConstruct.source,
                           llvm::omp::Directive::OMPD_cancel, clauses)};
   genCancellationPointOp(converter, semaCtx, eval, loc, queue, queue.begin());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDepobjConstruct &construct) {
   // These values will be ignored until the construct itself is implemented,
   // but run them anyway for the sake of testing (via a Todo test).
   ObjectList objects = makeObjects(construct.v.Arguments(), semaCtx);
   assert(objects.size() == 1);
   List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
   assert(clauses.size() == 1);
   (void)objects;
   (void)clauses;

   TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPInteropConstruct &interopConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
 }

 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPStandaloneConstruct &standaloneConstruct) {
   Fortran::common::visit(
       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
       standaloneConstruct.u);
 }

 //===----------------------------------------------------------------------===//
 // OpenMPConstruct visitors
 //===----------------------------------------------------------------------===//

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAtomicConstruct &atomicConstruct) {
   Fortran::common::visit(
       common::visitors{
           [&](const parser::OmpAtomicRead &atomicRead) {
             mlir::Location loc = converter.genLocation(atomicRead.source);
             genAtomicRead(converter, atomicRead, loc);
           },
           [&](const parser::OmpAtomicWrite &atomicWrite) {
             mlir::Location loc = converter.genLocation(atomicWrite.source);
             genAtomicWrite(converter, atomicWrite, loc);
           },
           [&](const parser::OmpAtomic &atomicConstruct) {
             mlir::Location loc = converter.genLocation(atomicConstruct.source);
             genOmpAtomic(converter, atomicConstruct, loc);
           },
           [&](const parser::OmpAtomicUpdate &atomicUpdate) {
             mlir::Location loc = converter.genLocation(atomicUpdate.source);
             genAtomicUpdate(converter, atomicUpdate, loc);
           },
           [&](const parser::OmpAtomicCapture &atomicCapture) {
             mlir::Location loc = converter.genLocation(atomicCapture.source);
             genAtomicCapture(converter, atomicCapture, loc);
           },
           [&](const parser::OmpAtomicCompare &atomicCompare) {
             mlir::Location loc = converter.genLocation(atomicCompare.source);
             TODO(loc, "OpenMP atomic compare");
           },
       },
       atomicConstruct.u);
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPBlockConstruct &blockConstruct) {
   const auto &beginBlockDirective =
       std::get<parser::OmpBeginBlockDirective>(blockConstruct.t);
   const auto &endBlockDirective =
       std::get<parser::OmpEndBlockDirective>(blockConstruct.t);
   mlir::Location currentLocation =
       converter.genLocation(beginBlockDirective.source);
   const auto origDirective =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx);
   clauses.append(makeClauses(
       std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx));

   assert(llvm::omp::blockConstructSet.test(origDirective) &&
          "Expected block construct");
   (void)origDirective;

   for (const Clause &clause : clauses) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
     if (!std::holds_alternative<clause::Affinity>(clause.u) &&
         !std::holds_alternative<clause::Allocate>(clause.u) &&
         !std::holds_alternative<clause::Copyin>(clause.u) &&
         !std::holds_alternative<clause::Copyprivate>(clause.u) &&
         !std::holds_alternative<clause::Default>(clause.u) &&
         !std::holds_alternative<clause::Depend>(clause.u) &&
         !std::holds_alternative<clause::Filter>(clause.u) &&
         !std::holds_alternative<clause::Final>(clause.u) &&
         !std::holds_alternative<clause::Firstprivate>(clause.u) &&
         !std::holds_alternative<clause::HasDeviceAddr>(clause.u) &&
         !std::holds_alternative<clause::If>(clause.u) &&
         !std::holds_alternative<clause::IsDevicePtr>(clause.u) &&
         !std::holds_alternative<clause::Map>(clause.u) &&
         !std::holds_alternative<clause::Nowait>(clause.u) &&
         !std::holds_alternative<clause::NumTeams>(clause.u) &&
         !std::holds_alternative<clause::NumThreads>(clause.u) &&
         !std::holds_alternative<clause::OmpxBare>(clause.u) &&
         !std::holds_alternative<clause::Priority>(clause.u) &&
         !std::holds_alternative<clause::Private>(clause.u) &&
         !std::holds_alternative<clause::ProcBind>(clause.u) &&
         !std::holds_alternative<clause::Reduction>(clause.u) &&
         !std::holds_alternative<clause::Shared>(clause.u) &&
         !std::holds_alternative<clause::Simd>(clause.u) &&
         !std::holds_alternative<clause::ThreadLimit>(clause.u) &&
         !std::holds_alternative<clause::Threads>(clause.u) &&
         !std::holds_alternative<clause::UseDeviceAddr>(clause.u) &&
         !std::holds_alternative<clause::UseDevicePtr>(clause.u) &&
         !std::holds_alternative<clause::InReduction>(clause.u) &&
         !std::holds_alternative<clause::Mergeable>(clause.u) &&
         !std::holds_alternative<clause::Untied>(clause.u) &&
         !std::holds_alternative<clause::TaskReduction>(clause.u) &&
         !std::holds_alternative<clause::Detach>(clause.u)) {
       std::string name =
           parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
       TODO(clauseLocation, name + " clause is not implemented yet");
     }
   }

   llvm::omp::Directive directive =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
   const parser::CharBlock &source =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                           eval, source, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAssumeConstruct &assumeConstruct) {
   mlir::Location clauseLocation = converter.genLocation(assumeConstruct.source);
   TODO(clauseLocation, "OpenMP ASSUME construct");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPCriticalConstruct &criticalConstruct) {
   const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t);
   List<Clause> clauses =
       makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx);

   ConstructQueue queue{buildConstructQueue(
       converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
       llvm::omp::Directive::OMPD_critical, clauses)};

   const auto &name = std::get<std::optional<parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
   genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue,
                 queue.begin(), name);
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPUtilityConstruct &) {
   TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDispatchConstruct &) {
   TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPExecutableAllocate &execAllocConstruct) {
   TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPLoopConstruct &loopConstruct) {
   const auto &beginLoopDirective =
       std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
   if (auto &endLoopDirective =
           std::get<std::optional<parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
     clauses.append(makeClauses(
         std::get<parser::OmpClauseList>(endLoopDirective->t), semaCtx));
   }

   mlir::Location currentLocation =
       converter.genLocation(beginLoopDirective.source);

   llvm::omp::Directive directive =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
   const parser::CharBlock &source =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                           eval, source, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPSectionConstruct &sectionConstruct) {
   // Do nothing here. SECTION is lowered inside of the lowering for Sections
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPSectionsConstruct &sectionsConstruct) {
   const auto &beginSectionsDirective =
       std::get<parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
   const auto &endSectionsDirective =
       std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
   const auto &sectionBlocks =
       std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
   clauses.append(makeClauses(
       std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
   mlir::Location currentLocation = converter.getCurrentLocation();

   llvm::omp::Directive directive =
       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
   const parser::CharBlock &source =
       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                           eval, source, directive, clauses)};
   ConstructQueue::iterator next = queue.begin();
   // Generate constructs that come first e.g. Parallel
   while (next != queue.end() &&
          next->id != llvm::omp::Directive::OMPD_sections) {
     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                    next);
     next = std::next(next);
   }

   // call genSectionsOp directly (not via genOMPDispatch) so that we can add the
   // sectionBlocks argument
   assert(next != queue.end());
   assert(next->id == llvm::omp::Directive::OMPD_sections);
   genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue,
                 next, sectionBlocks);
   assert(std::next(next) == queue.end());
 }

 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPConstruct &ompConstruct) {
   Fortran::common::visit(
       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
       ompConstruct.u);
 }

 //===----------------------------------------------------------------------===//
 // Public functions
 //===----------------------------------------------------------------------===//

 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
                                                      mlir::Operation *op,
                                                      mlir::Location loc) {
   if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp,
                 mlir::omp::LoopNestOp>(op))
     return builder.create<mlir::omp::YieldOp>(loc);
   return builder.create<mlir::omp::TerminatorOp>(loc);
 }

 void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter,
                                         lower::SymMap &symTable,
                                         semantics::SemanticsContext &semaCtx,
                                         lower::pft::Evaluation &eval,
                                         const parser::OpenMPConstruct &omp) {
   lower::SymMapScope scope(symTable);
   genOMP(converter, symTable, semaCtx, eval, omp);
 }

 void Fortran::lower::genOpenMPDeclarativeConstruct(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     const parser::OpenMPDeclarativeConstruct &omp) {
   genOMP(converter, symTable, semaCtx, eval, omp);
   genNestedEvaluations(converter, eval);
 }

 void Fortran::lower::genOpenMPSymbolProperties(
     lower::AbstractConverter &converter, const lower::pft::Variable &var) {
   assert(var.hasSymbol() && "Expecting Symbol");
   const semantics::Symbol &sym = var.getSymbol();

   if (sym.test(semantics::Symbol::Flag::OmpThreadprivate))
     lower::genThreadprivateOp(converter, var);

   if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget))
     lower::genDeclareTargetIntGlobal(converter, var);
 }

 int64_t
 Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
   for (const parser::OmpClause &clause : clauseList.v) {
     if (const auto &collapseClause =
             std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
       const auto *expr = semantics::GetExpr(collapseClause->v);
       return evaluate::ToInt64(*expr).value();
     }
   }
   return 1;
 }

 void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
                                         const lower::pft::Variable &var) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Location currentLocation = converter.getCurrentLocation();

   const semantics::Symbol &sym = var.getSymbol();
   mlir::Value symThreadprivateValue;
   if (const semantics::Symbol *common =
           semantics::FindCommonBlockContaining(sym.GetUltimate())) {
     mlir::Value commonValue = converter.getSymbolAddress(*common);
     if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) {
       // Generate ThreadprivateOp for a common block instead of its members and
       // only do it once for a common block.
       return;
     }
     // Generate ThreadprivateOp and rebind the common block.
     mlir::Value commonThreadprivateValue =
         firOpBuilder.create<mlir::omp::ThreadprivateOp>(
             currentLocation, commonValue.getType(), commonValue);
     converter.bindSymbol(*common, commonThreadprivateValue);
     // Generate the threadprivate value for the common block member.
     symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
                                                  sym, commonThreadprivateValue);
   } else if (!var.isGlobal()) {
     // Non-global variable which can be in threadprivate directive must be one
     // variable in main program, and it has implicit SAVE attribute. Take it as
     // with SAVE attribute, so to create GlobalOp for it to simplify the
     // translation to LLVM IR.
     // Avoids performing multiple globalInitializations.
     fir::GlobalOp global;
     auto module = converter.getModuleOp();
     std::string globalName = converter.mangleName(sym);
     if (module.lookupSymbol<fir::GlobalOp>(globalName))
       global = module.lookupSymbol<fir::GlobalOp>(globalName);
     else
       global = globalInitialization(converter, firOpBuilder, sym, var,
                                     currentLocation);

     mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
         currentLocation, global.resultType(), global.getSymbol());
     symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
         currentLocation, symValue.getType(), symValue);
   } else {
     mlir::Value symValue = converter.getSymbolAddress(sym);

     // The symbol may be use-associated multiple times, and nothing needs to be
     // done after the original symbol is mapped to the threadprivatized value
     // for the first time. Use the threadprivatized value directly.
     mlir::Operation *op;
     if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>())
       op = declOp.getMemref().getDefiningOp();
     else
       op = symValue.getDefiningOp();
     if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
       return;

     symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
         currentLocation, symValue.getType(), symValue);
   }

   fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym);
   fir::ExtendedValue symThreadprivateExv =
       getExtendedValue(sexv, symThreadprivateValue);
   converter.bindSymbol(sym, symThreadprivateExv);
 }

 // This function replicates threadprivate's behaviour of generating
 // an internal fir.GlobalOp for non-global variables in the main program
 // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR
 // generation.
 void Fortran::lower::genDeclareTargetIntGlobal(
     lower::AbstractConverter &converter, const lower::pft::Variable &var) {
   if (!var.isGlobal()) {
     // A non-global variable which can be in a declare target directive must
     // be a variable in the main program, and it has the implicit SAVE
     // attribute. We create a GlobalOp for it to simplify the translation to
     // LLVM IR.
     globalInitialization(converter, converter.getFirOpBuilder(),
                          var.getSymbol(), var, converter.getCurrentLocation());
   }
 }

 bool Fortran::lower::isOpenMPTargetConstruct(
     const parser::OpenMPConstruct &omp) {
   llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
   if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
     const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t);
     dir = std::get<parser::OmpBlockDirective>(begin.t).v;
   } else if (const auto *loop =
                  std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {
     const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t);
     dir = std::get<parser::OmpLoopDirective>(begin.t).v;
   }
   return llvm::omp::allTargetSet.test(dir);
 }

 void Fortran::lower::gatherOpenMPDeferredDeclareTargets(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclarativeConstruct &ompDecl,
     llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo>
         &deferredDeclareTarget) {
   Fortran::common::visit(
       common::visitors{
           [&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
             collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq,
                                           deferredDeclareTarget);
           },
           [&](const auto &) {},
       },
       ompDecl.u);
 }

 bool Fortran::lower::isOpenMPDeviceDeclareTarget(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclarativeConstruct &ompDecl) {
   return Fortran::common::visit(
       common::visitors{
           [&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
             mlir::omp::DeclareTargetDeviceType targetType =
                 getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq)
                     .value_or(mlir::omp::DeclareTargetDeviceType::host);
             return targetType != mlir::omp::DeclareTargetDeviceType::host;
           },
           [&](const auto &) { return false; },
       },
       ompDecl.u);
 }

 // In certain cases such as subroutine or function interfaces which declare
 // but do not define or directly call the subroutine or function in the same
 // module, their lowering is delayed until after the declare target construct
 // itself is processed, so there symbol is not within the table.
 //
 // This function will also return true if we encounter any device declare
 // target cases, to satisfy checking if we require the requires attributes
 // on the module.
 bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
     mlir::Operation *mod,
     llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets,
     AbstractConverter &converter) {
   bool deviceCodeFound = false;
   auto modOp = llvm::cast<mlir::ModuleOp>(mod);
   for (auto declTar : deferredDeclareTargets) {
     mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym));

     // Due to interfaces being optionally emitted on usage in a module,
     // not finding an operation at this point cannot be a hard error, we
     // simply ignore it for now.
     // TODO: Add semantic checks for detecting cases where an erronous
     // (undefined) symbol has been supplied to a declare target clause
     if (!op)
       continue;

     auto devType = declTar.declareTargetDeviceType;
     if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host)
       deviceCodeFound = true;

     markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
                       devType);
   }

   return deviceCodeFound;
 }

 void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
                                        const semantics::Symbol *symbol) {
   using MlirRequires = mlir::omp::ClauseRequires;
   using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag;

   if (auto offloadMod =
           llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
     semantics::WithOmpDeclarative::RequiresFlags semaFlags;
     if (symbol) {
       common::visit(
           [&](const auto &details) {
             if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative,
                                             std::decay_t<decltype(details)>>) {
               if (details.has_ompRequires())
                 semaFlags = *details.ompRequires();
             }
           },
           symbol->details());
     }

     // Use pre-populated omp.requires module attribute if it was set, so that
     // the "-fopenmp-force-usm" compiler option is honored.
     MlirRequires mlirFlags = offloadMod.getRequires();
     if (semaFlags.test(SemaRequires::ReverseOffload))
       mlirFlags = mlirFlags | MlirRequires::reverse_offload;
     if (semaFlags.test(SemaRequires::UnifiedAddress))
       mlirFlags = mlirFlags | MlirRequires::unified_address;
     if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
       mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
     if (semaFlags.test(SemaRequires::DynamicAllocators))
       mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;

     offloadMod.setRequires(mlirFlags);
   }
 }