Reland [flang] Generalized simplification of HLFIR reduction ops. (#136071) (#136246)

This change generalizes SumAsElemental inlining in
SimplifyHLFIRIntrinsics pass so that it can be applied
to ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, SUM.

This change makes the special handling of the reduction
operations in OptimizedBufferization redundant: once HLFIR
operations are inlined, the hlfir.elemental inlining should
do the rest of the job.
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c489450..79aabd2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -772,458 +772,6 @@
   return mlir::success();
 }
 
-using GenBodyFn =
-    std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
-                              const llvm::SmallVectorImpl<mlir::Value> &)>;
-static mlir::Value generateReductionLoop(fir::FirOpBuilder &builder,
-                                         mlir::Location loc, mlir::Value init,
-                                         mlir::Value shape, GenBodyFn genBody) {
-  auto extents = hlfir::getIndexExtents(loc, builder, shape);
-  mlir::Value reduction = init;
-  mlir::IndexType idxTy = builder.getIndexType();
-  mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
-
-  // Create a reduction loop nest. We use one-based indices so that they can be
-  // passed to the elemental, and reverse the order so that they can be
-  // generated in column-major order for better performance.
-  llvm::SmallVector<mlir::Value> indices(extents.size(), mlir::Value{});
-  for (unsigned i = 0; i < extents.size(); ++i) {
-    auto loop = builder.create<fir::DoLoopOp>(
-        loc, oneIdx, extents[extents.size() - i - 1], oneIdx, false,
-        /*finalCountValue=*/false, reduction);
-    reduction = loop.getRegionIterArgs()[0];
-    indices[extents.size() - i - 1] = loop.getInductionVar();
-    // Set insertion point to the loop body so that the next loop
-    // is inserted inside the current one.
-    builder.setInsertionPointToStart(loop.getBody());
-  }
-
-  // Generate the body
-  reduction = genBody(builder, loc, reduction, indices);
-
-  // Unwind the loop nest.
-  for (unsigned i = 0; i < extents.size(); ++i) {
-    auto result = builder.create<fir::ResultOp>(loc, reduction);
-    auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
-    reduction = loop.getResult(0);
-    // Set insertion point after the loop operation that we have
-    // just processed.
-    builder.setInsertionPointAfter(loop.getOperation());
-  }
-
-  return reduction;
-}
-
-auto makeMinMaxInitValGenerator(bool isMax) {
-  return [isMax](fir::FirOpBuilder builder, mlir::Location loc,
-                 mlir::Type elementType) -> mlir::Value {
-    if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) {
-      const llvm::fltSemantics &sem = ty.getFloatSemantics();
-      llvm::APFloat limit = llvm::APFloat::getInf(sem, /*Negative=*/isMax);
-      return builder.createRealConstant(loc, elementType, limit);
-    }
-    unsigned bits = elementType.getIntOrFloatBitWidth();
-    int64_t limitInt =
-        isMax ? llvm::APInt::getSignedMinValue(bits).getSExtValue()
-              : llvm::APInt::getSignedMaxValue(bits).getSExtValue();
-    return builder.createIntegerConstant(loc, elementType, limitInt);
-  };
-}
-
-mlir::Value generateMinMaxComparison(fir::FirOpBuilder builder,
-                                     mlir::Location loc, mlir::Value elem,
-                                     mlir::Value reduction, bool isMax) {
-  if (mlir::isa<mlir::FloatType>(reduction.getType())) {
-    // For FP reductions we want the first smallest value to be used, that
-    // is not NaN. A OGL/OLT condition will usually work for this unless all
-    // the values are Nan or Inf. This follows the same logic as
-    // NumericCompare for Minloc/Maxlox in extrema.cpp.
-    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
-        loc,
-        isMax ? mlir::arith::CmpFPredicate::OGT
-              : mlir::arith::CmpFPredicate::OLT,
-        elem, reduction);
-    mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
-    mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
-    cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
-    return builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
-  } else if (mlir::isa<mlir::IntegerType>(reduction.getType())) {
-    return builder.create<mlir::arith::CmpIOp>(
-        loc,
-        isMax ? mlir::arith::CmpIPredicate::sgt
-              : mlir::arith::CmpIPredicate::slt,
-        elem, reduction);
-  }
-  llvm_unreachable("unsupported type");
-}
-
-/// Given a reduction operation with an elemental/designate source, attempt to
-/// generate a do-loop to perform the operation inline.
-///   %e = hlfir.elemental %shape unordered
-///   %r = hlfir.count %e
-/// =>
-///   %r = for.do_loop %arg = 1 to bound(%shape) step 1 iter_args(%arg2 = init)
-///     %i = <inline elemental>
-///     %c = <reduce count> %i
-///     fir.result %c
-template <typename Op>
-class ReductionConversion : public mlir::OpRewritePattern<Op> {
-public:
-  using mlir::OpRewritePattern<Op>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
-    mlir::Location loc = op.getLoc();
-    // Select source and validate its arguments.
-    mlir::Value source;
-    bool valid = false;
-    if constexpr (std::is_same_v<Op, hlfir::AnyOp> ||
-                  std::is_same_v<Op, hlfir::AllOp> ||
-                  std::is_same_v<Op, hlfir::CountOp>) {
-      source = op.getMask();
-      valid = !op.getDim();
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
-                         std::is_same_v<Op, hlfir::MinvalOp>) {
-      source = op.getArray();
-      valid = !op.getDim() && !op.getMask();
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
-                         std::is_same_v<Op, hlfir::MinlocOp>) {
-      source = op.getArray();
-      valid = !op.getDim() && !op.getMask() && !op.getBack();
-    }
-    if (!valid)
-      return rewriter.notifyMatchFailure(
-          op, "Currently does not accept optional arguments");
-
-    hlfir::ElementalOp elemental;
-    hlfir::DesignateOp designate;
-    mlir::Value shape;
-    if ((elemental = source.template getDefiningOp<hlfir::ElementalOp>())) {
-      shape = elemental.getOperand(0);
-    } else if ((designate =
-                    source.template getDefiningOp<hlfir::DesignateOp>())) {
-      shape = designate.getShape();
-    } else {
-      return rewriter.notifyMatchFailure(op, "Did not find valid argument");
-    }
-
-    auto inlineSource =
-        [elemental,
-         &designate](fir::FirOpBuilder builder, mlir::Location loc,
-                     const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-        -> mlir::Value {
-      if (elemental) {
-        // Inline the elemental and get the value from it.
-        auto yield =
-            inlineElementalOp(loc, builder, elemental, oneBasedIndices);
-        auto tmp = yield.getElementValue();
-        yield->erase();
-        return tmp;
-      }
-      if (designate) {
-        // Create a designator over the array designator, then load the
-        // reference.
-        mlir::Value elementAddr = hlfir::getElementAt(
-            loc, builder, hlfir::Entity{designate.getResult()},
-            oneBasedIndices);
-        return builder.create<fir::LoadOp>(loc, elementAddr);
-      }
-      llvm_unreachable("unsupported type");
-    };
-
-    fir::FirOpBuilder builder{rewriter, op.getOperation()};
-
-    mlir::Value init;
-    GenBodyFn genBodyFn;
-    if constexpr (std::is_same_v<Op, hlfir::AnyOp>) {
-      init = builder.createIntegerConstant(loc, builder.getI1Type(), 0);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally set the reduction variable.
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        return builder.create<mlir::arith::OrIOp>(loc, reduction, cond);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::AllOp>) {
-      init = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally set the reduction variable.
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        return builder.create<mlir::arith::AndIOp>(loc, reduction, cond);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
-      init = builder.createIntegerConstant(loc, op.getType(), 0);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally add one to the current value
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        mlir::Value one =
-            builder.createIntegerConstant(loc, reduction.getType(), 1);
-        mlir::Value add1 =
-            builder.create<mlir::arith::AddIOp>(loc, reduction, one);
-        return builder.create<mlir::arith::SelectOp>(loc, cond, add1,
-                                                     reduction);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
-                         std::is_same_v<Op, hlfir::MinlocOp>) {
-      // TODO: implement minloc/maxloc conversion.
-      return rewriter.notifyMatchFailure(
-          op, "Currently minloc/maxloc is not handled");
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
-                         std::is_same_v<Op, hlfir::MinvalOp>) {
-      mlir::Type ty = op.getType();
-      if (!(mlir::isa<mlir::FloatType>(ty) ||
-            mlir::isa<mlir::IntegerType>(ty))) {
-        return rewriter.notifyMatchFailure(
-            op, "Type is not supported for Maxval or Minval yet");
-      }
-
-      bool isMax = std::is_same_v<Op, hlfir::MaxvalOp>;
-      init = makeMinMaxInitValGenerator(isMax)(builder, loc, ty);
-      genBodyFn = [inlineSource, isMax](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        mlir::Value val = inlineSource(builder, loc, oneBasedIndices);
-        mlir::Value cmp =
-            generateMinMaxComparison(builder, loc, val, reduction, isMax);
-        return builder.create<mlir::arith::SelectOp>(loc, cmp, val, reduction);
-      };
-    } else {
-      llvm_unreachable("unsupported type");
-    }
-
-    mlir::Value res =
-        generateReductionLoop(builder, loc, init, shape, genBodyFn);
-    if (res.getType() != op.getType())
-      res = builder.create<fir::ConvertOp>(loc, op.getType(), res);
-
-    // Check if the op was the only user of the source (apart from a destroy),
-    // and remove it if so.
-    mlir::Operation *sourceOp = source.getDefiningOp();
-    mlir::Operation::user_range srcUsers = sourceOp->getUsers();
-    hlfir::DestroyOp srcDestroy;
-    if (std::distance(srcUsers.begin(), srcUsers.end()) == 2) {
-      srcDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*srcUsers.begin());
-      if (!srcDestroy)
-        srcDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++srcUsers.begin());
-    }
-
-    rewriter.replaceOp(op, res);
-    if (srcDestroy) {
-      rewriter.eraseOp(srcDestroy);
-      rewriter.eraseOp(sourceOp);
-    }
-    return mlir::success();
-  }
-};
-
-// Look for minloc(mask=elemental) and generate the minloc loop with
-// inlined elemental.
-//  %e = hlfir.elemental %shape ({ ... })
-//  %m = hlfir.minloc %array mask %e
-template <typename Op>
-class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
-public:
-  using mlir::OpRewritePattern<Op>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(Op mloc, mlir::PatternRewriter &rewriter) const override {
-    if (!mloc.getMask() || mloc.getDim() || mloc.getBack())
-      return rewriter.notifyMatchFailure(mloc,
-                                         "Did not find valid minloc/maxloc");
-
-    bool isMax = std::is_same_v<Op, hlfir::MaxlocOp>;
-
-    auto elemental =
-        mloc.getMask().template getDefiningOp<hlfir::ElementalOp>();
-    if (!elemental || hlfir::elementalOpMustProduceTemp(elemental))
-      return rewriter.notifyMatchFailure(mloc, "Did not find elemental");
-
-    mlir::Value array = mloc.getArray();
-
-    unsigned rank = mlir::cast<hlfir::ExprType>(mloc.getType()).getShape()[0];
-    mlir::Type arrayType = array.getType();
-    if (!mlir::isa<fir::BoxType>(arrayType))
-      return rewriter.notifyMatchFailure(
-          mloc, "Currently requires a boxed type input");
-    mlir::Type elementType = hlfir::getFortranElementType(arrayType);
-    if (!fir::isa_trivial(elementType))
-      return rewriter.notifyMatchFailure(
-          mloc, "Character arrays are currently not handled");
-
-    mlir::Location loc = mloc.getLoc();
-    fir::FirOpBuilder builder{rewriter, mloc.getOperation()};
-    mlir::Value resultArr = builder.createTemporary(
-        loc, fir::SequenceType::get(
-                 rank, hlfir::getFortranElementType(mloc.getType())));
-
-    auto init = makeMinMaxInitValGenerator(isMax);
-
-    auto genBodyOp =
-        [&rank, &resultArr, &elemental, isMax](
-            fir::FirOpBuilder builder, mlir::Location loc,
-            mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
-            mlir::Value reduction,
-            const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
-      // We are in the innermost loop: generate the elemental inline
-      mlir::Value oneIdx =
-          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
-      llvm::SmallVector<mlir::Value> oneBasedIndices;
-      llvm::transform(
-          indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) {
-            return builder.create<mlir::arith::AddIOp>(loc, V, oneIdx);
-          });
-      hlfir::YieldElementOp yield =
-          hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices);
-      mlir::Value maskElem = yield.getElementValue();
-      yield->erase();
-
-      mlir::Type ifCompatType = builder.getI1Type();
-      mlir::Value ifCompatElem =
-          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
-
-      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
-      fir::IfOp maskIfOp =
-          builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
-                                    /*withElseRegion=*/true);
-      builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front());
-
-      // Set flag that mask was true at some point
-      mlir::Value flagSet = builder.createIntegerConstant(
-          loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
-      mlir::Value isFirst = builder.create<fir::LoadOp>(loc, flagRef);
-      mlir::Value addr = hlfir::getElementAt(loc, builder, hlfir::Entity{array},
-                                             oneBasedIndices);
-      mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
-      // Compare with the max reduction value
-      mlir::Value cmp =
-          generateMinMaxComparison(builder, loc, elem, reduction, isMax);
-
-      // The condition used for the loop is isFirst || <the condition above>.
-      isFirst = builder.create<fir::ConvertOp>(loc, cmp.getType(), isFirst);
-      isFirst = builder.create<mlir::arith::XOrIOp>(
-          loc, isFirst, builder.createIntegerConstant(loc, cmp.getType(), 1));
-      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, isFirst);
-
-      // Set the new coordinate to the result
-      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
-                                                 /*withElseRegion*/ true);
-
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
-      mlir::Type resultElemTy =
-          hlfir::getFortranElementType(resultArr.getType());
-      mlir::Type returnRefTy = builder.getRefType(resultElemTy);
-      mlir::IndexType idxTy = builder.getIndexType();
-
-      for (unsigned int i = 0; i < rank; ++i) {
-        mlir::Value index = builder.createIntegerConstant(loc, idxTy, i + 1);
-        mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
-            loc, returnRefTy, resultArr, index);
-        mlir::Value fortranIndex = builder.create<fir::ConvertOp>(
-            loc, resultElemTy, oneBasedIndices[i]);
-        builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
-      }
-      builder.create<fir::ResultOp>(loc, elem);
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      builder.create<fir::ResultOp>(loc, reduction);
-      builder.setInsertionPointAfter(ifOp);
-
-      // Close the mask if
-      builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
-      builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front());
-      builder.create<fir::ResultOp>(loc, reduction);
-      builder.setInsertionPointAfter(maskIfOp);
-
-      return maskIfOp.getResult(0);
-    };
-    auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc,
-                        const mlir::Type &resultElemType, mlir::Value resultArr,
-                        mlir::Value index) {
-      mlir::Type resultRefTy = builder.getRefType(resultElemType);
-      mlir::Value oneIdx =
-          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
-      index = builder.create<mlir::arith::AddIOp>(loc, index, oneIdx);
-      return builder.create<hlfir::DesignateOp>(loc, resultRefTy, resultArr,
-                                                index);
-    };
-
-    // Initialize the result
-    mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
-    mlir::Type resultRefTy = builder.getRefType(resultElemTy);
-    mlir::Value returnValue =
-        builder.createIntegerConstant(loc, resultElemTy, 0);
-    for (unsigned int i = 0; i < rank; ++i) {
-      mlir::Value index =
-          builder.createIntegerConstant(loc, builder.getIndexType(), i + 1);
-      mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
-          loc, resultRefTy, resultArr, index);
-      builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
-    }
-
-    fir::genMinMaxlocReductionLoop(builder, array, init, genBodyOp, getAddrFn,
-                                   rank, elementType, loc, builder.getI1Type(),
-                                   resultArr, false);
-
-    mlir::Value asExpr = builder.create<hlfir::AsExprOp>(
-        loc, resultArr, builder.createBool(loc, false));
-
-    // Check all the users - the destroy is no longer required, and any assign
-    // can use resultArr directly so that InlineHLFIRAssign pass
-    // can optimize the results. Other operations are replaced with an AsExpr
-    // for the temporary resultArr.
-    llvm::SmallVector<hlfir::DestroyOp> destroys;
-    llvm::SmallVector<hlfir::AssignOp> assigns;
-    for (auto user : mloc->getUsers()) {
-      if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(user))
-        destroys.push_back(destroy);
-      else if (auto assign = mlir::dyn_cast<hlfir::AssignOp>(user))
-        assigns.push_back(assign);
-    }
-
-    // Check if the minloc/maxloc was the only user of the elemental (apart from
-    // a destroy), and remove it if so.
-    mlir::Operation::user_range elemUsers = elemental->getUsers();
-    hlfir::DestroyOp elemDestroy;
-    if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) {
-      elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*elemUsers.begin());
-      if (!elemDestroy)
-        elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++elemUsers.begin());
-    }
-
-    for (auto d : destroys)
-      rewriter.eraseOp(d);
-    for (auto a : assigns)
-      a.setOperand(0, resultArr);
-    rewriter.replaceOp(mloc, asExpr);
-    if (elemDestroy) {
-      rewriter.eraseOp(elemDestroy);
-      rewriter.eraseOp(elemental);
-    }
-    return mlir::success();
-  }
-};
-
 class EvaluateIntoMemoryAssignBufferization
     : public mlir::OpRewritePattern<hlfir::EvaluateInMemoryOp> {
 
@@ -1340,19 +888,6 @@
     patterns.insert<ElementalAssignBufferization>(context);
     patterns.insert<BroadcastAssignBufferization>(context);
     patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
-    patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
-    // TODO: implement basic minloc/maxloc conversion.
-    // patterns.insert<ReductionConversion<hlfir::MaxlocOp>>(context);
-    // patterns.insert<ReductionConversion<hlfir::MinlocOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::MaxvalOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::MinvalOp>>(context);
-    patterns.insert<ReductionMaskConversion<hlfir::MinlocOp>>(context);
-    patterns.insert<ReductionMaskConversion<hlfir::MaxlocOp>>(context);
-    // TODO: implement masked minval/maxval conversion.
-    // patterns.insert<ReductionMaskConversion<hlfir::MaxvalOp>>(context);
-    // patterns.insert<ReductionMaskConversion<hlfir::MinvalOp>>(context);
 
     if (mlir::failed(mlir::applyPatternsGreedily(
             getOperation(), std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index bac1012..e9d820a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -173,245 +173,901 @@
   }
 };
 
-// Expand the SUM(DIM=CONSTANT) operation into .
-class SumAsElementalConversion : public mlir::OpRewritePattern<hlfir::SumOp> {
+/// Base class for converting reduction-like operations into
+/// a reduction loop[-nest] optionally wrapped into hlfir.elemental.
+/// It is used to handle operations produced for ALL, ANY, COUNT,
+/// MAXLOC, MAXVAL, MINLOC, MINVAL, SUM intrinsics.
+///
+/// All of these operations take an input array, and optional
+/// dim, mask arguments. ALL, ANY, COUNT do not have mask argument.
+class ReductionAsElementalConverter {
 public:
-  using mlir::OpRewritePattern<hlfir::SumOp>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(hlfir::SumOp sum,
-                  mlir::PatternRewriter &rewriter) const override {
-    hlfir::Entity array = hlfir::Entity{sum.getArray()};
-    bool isTotalReduction = hlfir::Entity{sum}.getRank() == 0;
-    mlir::Value dim = sum.getDim();
-    int64_t dimVal = 0;
-    if (!isTotalReduction) {
-      // In case of partial reduction we should ignore the operations
-      // with invalid DIM values. They may appear in dead code
-      // after constant propagation.
-      auto constDim = fir::getIntIfConstant(dim);
-      if (!constDim)
-        return rewriter.notifyMatchFailure(sum, "Nonconstant DIM for SUM");
-      dimVal = *constDim;
-
-      if ((dimVal <= 0 || dimVal > array.getRank()))
-        return rewriter.notifyMatchFailure(
-            sum, "Invalid DIM for partial SUM reduction");
-    }
-
-    mlir::Location loc = sum.getLoc();
-    fir::FirOpBuilder builder{rewriter, sum.getOperation()};
-    mlir::Type elementType = hlfir::getFortranElementType(sum.getType());
-    mlir::Value mask = sum.getMask();
-
-    mlir::Value resultShape, dimExtent;
-    llvm::SmallVector<mlir::Value> arrayExtents;
-    if (isTotalReduction)
-      arrayExtents = hlfir::genExtentsVector(loc, builder, array);
-    else
-      std::tie(resultShape, dimExtent) =
-          genResultShapeForPartialReduction(loc, builder, array, dimVal);
-
-    // If the mask is present and is a scalar, then we'd better load its value
-    // outside of the reduction loop making the loop unswitching easier.
-    mlir::Value isPresentPred, maskValue;
-    if (mask) {
-      if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
-        // MASK represented by a box might be dynamically optional,
-        // so we have to check for its presence before accessing it.
-        isPresentPred =
-            builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
-      }
-
-      if (hlfir::Entity{mask}.isScalar())
-        maskValue = genMaskValue(loc, builder, mask, isPresentPred, {});
-    }
-
-    auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                         mlir::ValueRange inputIndices) -> hlfir::Entity {
-      // Loop over all indices in the DIM dimension, and reduce all values.
-      // If DIM is not present, do total reduction.
-
-      // Initial value for the reduction.
-      mlir::Value reductionInitValue =
-          fir::factory::createZeroValue(builder, loc, elementType);
-
-      // The reduction loop may be unordered if FastMathFlags::reassoc
-      // transformations are allowed. The integer reduction is always
-      // unordered.
-      bool isUnordered = mlir::isa<mlir::IntegerType>(elementType) ||
-                         static_cast<bool>(sum.getFastmath() &
-                                           mlir::arith::FastMathFlags::reassoc);
-
-      llvm::SmallVector<mlir::Value> extents;
-      if (isTotalReduction)
-        extents = arrayExtents;
-      else
-        extents.push_back(
-            builder.createConvert(loc, builder.getIndexType(), dimExtent));
-
-      auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                         mlir::ValueRange oneBasedIndices,
-                         mlir::ValueRange reductionArgs)
-          -> llvm::SmallVector<mlir::Value, 1> {
-        // Generate the reduction loop-nest body.
-        // The initial reduction value in the innermost loop
-        // is passed via reductionArgs[0].
-        llvm::SmallVector<mlir::Value> indices;
-        if (isTotalReduction) {
-          indices = oneBasedIndices;
-        } else {
-          indices = inputIndices;
-          indices.insert(indices.begin() + dimVal - 1, oneBasedIndices[0]);
-        }
-
-        mlir::Value reductionValue = reductionArgs[0];
-        fir::IfOp ifOp;
-        if (mask) {
-          // Make the reduction value update conditional on the value
-          // of the mask.
-          if (!maskValue) {
-            // If the mask is an array, use the elemental and the loop indices
-            // to address the proper mask element.
-            maskValue =
-                genMaskValue(loc, builder, mask, isPresentPred, indices);
-          }
-          mlir::Value isUnmasked = builder.create<fir::ConvertOp>(
-              loc, builder.getI1Type(), maskValue);
-          ifOp = builder.create<fir::IfOp>(loc, elementType, isUnmasked,
-                                           /*withElseRegion=*/true);
-          // In the 'else' block return the current reduction value.
-          builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-          builder.create<fir::ResultOp>(loc, reductionValue);
-
-          // In the 'then' block do the actual addition.
-          builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-        }
-
-        hlfir::Entity element =
-            hlfir::getElementAt(loc, builder, array, indices);
-        hlfir::Entity elementValue =
-            hlfir::loadTrivialScalar(loc, builder, element);
-        // NOTE: we can use "Kahan summation" same way as the runtime
-        // (e.g. when fast-math is not allowed), but let's start with
-        // the simple version.
-        reductionValue =
-            genScalarAdd(loc, builder, reductionValue, elementValue);
-
-        if (ifOp) {
-          builder.create<fir::ResultOp>(loc, reductionValue);
-          builder.setInsertionPointAfter(ifOp);
-          reductionValue = ifOp.getResult(0);
-        }
-
-        return {reductionValue};
-      };
-
-      llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
-          hlfir::genLoopNestWithReductions(loc, builder, extents,
-                                           {reductionInitValue}, genBody,
-                                           isUnordered);
-      return hlfir::Entity{reductionFinalValues[0]};
-    };
-
-    if (isTotalReduction) {
-      hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
-      rewriter.replaceOp(sum, result);
-      return mlir::success();
-    }
-
-    hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
-        loc, builder, elementType, resultShape, {}, genKernel,
-        /*isUnordered=*/true, /*polymorphicMold=*/nullptr,
-        sum.getResult().getType());
-
-    // it wouldn't be safe to replace block arguments with a different
-    // hlfir.expr type. Types can differ due to differing amounts of shape
-    // information
-    assert(elementalOp.getResult().getType() == sum.getResult().getType());
-
-    rewriter.replaceOp(sum, elementalOp);
-    return mlir::success();
+  ReductionAsElementalConverter(mlir::Operation *op,
+                                mlir::PatternRewriter &rewriter)
+      : op{op}, rewriter{rewriter}, loc{op->getLoc()}, builder{rewriter, op} {
+    assert(op->getNumResults() == 1);
   }
+  virtual ~ReductionAsElementalConverter() {}
+
+  /// Do the actual conversion or return mlir::failure(),
+  /// if conversion is not possible.
+  mlir::LogicalResult convert();
 
 private:
   // Return fir.shape specifying the shape of the result
-  // of a SUM reduction with DIM=dimVal. The second return value
+  // of a reduction with DIM=dimVal. The second return value
   // is the extent of the DIM dimension.
-  static std::tuple<mlir::Value, mlir::Value>
-  genResultShapeForPartialReduction(mlir::Location loc,
-                                    fir::FirOpBuilder &builder,
-                                    hlfir::Entity array, int64_t dimVal) {
-    llvm::SmallVector<mlir::Value> inExtents =
-        hlfir::genExtentsVector(loc, builder, array);
-    assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
-           "DIM must be present and a positive constant not exceeding "
-           "the array's rank");
+  std::tuple<mlir::Value, mlir::Value>
+  genResultShapeForPartialReduction(hlfir::Entity array, int64_t dimVal);
 
-    mlir::Value dimExtent = inExtents[dimVal - 1];
-    inExtents.erase(inExtents.begin() + dimVal - 1);
-    return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+  /// \p mask is a scalar or array logical mask.
+  /// If \p isPresentPred is not nullptr, it is a dynamic predicate value
+  /// identifying whether the mask's variable is present.
+  /// \p indices is a range of one-based indices to access \p mask
+  /// when it is an array.
+  ///
+  /// The method returns the scalar mask value to guard the access
+  /// to a single element of the input array.
+  mlir::Value genMaskValue(mlir::Value mask, mlir::Value isPresentPred,
+                           mlir::ValueRange indices);
+
+protected:
+  /// Return the input array.
+  virtual mlir::Value getSource() const = 0;
+
+  /// Return DIM or nullptr, if it is not present.
+  virtual mlir::Value getDim() const = 0;
+
+  /// Return MASK or nullptr, if it is not present.
+  virtual mlir::Value getMask() const { return nullptr; }
+
+  /// Return FastMathFlags attached to the operation
+  /// or arith::FastMathFlags::none, if the operation
+  /// does not support FastMathFlags (e.g. ALL, ANY, COUNT).
+  virtual mlir::arith::FastMathFlags getFastMath() const {
+    return mlir::arith::FastMathFlags::none;
+  }
+
+  /// Generates initial values for the reduction values used
+  /// by the reduction loop. In general, there is a single
+  /// loop-carried reduction value (e.g. for SUM), but, for example,
+  /// MAXLOC/MINLOC implementation uses multiple reductions.
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() = 0;
+
+  /// Perform reduction(s) update given a single input array's element
+  /// identified by \p array and \p oneBasedIndices coordinates.
+  /// \p currentValue specifies the current value(s) of the reduction(s)
+  /// inside the reduction loop body.
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) = 0;
+
+  /// Given reduction value(s) in \p reductionResults produced
+  /// by the reduction loop, apply any required updates and return
+  /// new reduction value(s) to be used after the reduction loop
+  /// (e.g. as the result yield of the wrapping hlfir.elemental).
+  /// NOTE: if the reduction loop is wrapped in hlfir.elemental,
+  /// the insertion point of any generated code is inside hlfir.elemental.
+  virtual hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+    assert(reductionResults.size() == 1 &&
+           "default implementation of genFinalResult expect a single reduction "
+           "value");
+    return hlfir::Entity{reductionResults[0]};
+  }
+
+  /// Return mlir::success(), if the operation can be converted.
+  /// The default implementation always returns mlir::success().
+  /// The derived type may override the default implementation
+  /// with its own definition.
+  virtual mlir::LogicalResult isConvertible() const { return mlir::success(); }
+
+  // Default implementation of isTotalReduction() just checks
+  // if the result of the operation is a scalar.
+  // True result indicates that the reduction has to be done
+  // across all elements, false result indicates that
+  // the result is an array expression produced by an hlfir.elemental
+  // operation with a single reduction loop across the DIM dimension.
+  //
+  // MAXLOC/MINLOC must override this.
+  virtual bool isTotalReduction() const { return getResultRank() == 0; }
+
+  // Return true, if the reduction loop[-nest] may be unordered.
+  // In general, FP reductions may only be unordered when
+  // FastMathFlags::reassoc transformations are allowed.
+  //
+  // Some dervied types may need to override this.
+  virtual bool isUnordered() const {
+    mlir::Type elemType = getSourceElementType();
+    if (mlir::isa<mlir::IntegerType, fir::LogicalType, fir::CharacterType>(
+            elemType))
+      return true;
+    return static_cast<bool>(getFastMath() &
+                             mlir::arith::FastMathFlags::reassoc);
+  }
+
+  /// Return 0, if DIM is not present or its values does not matter
+  /// (for example, a reduction of 1D array does not care about
+  /// the DIM value, assuming that it is a valid program).
+  /// Return mlir::failure(), if DIM is a constant known
+  /// to be invalid for the given array.
+  /// Otherwise, return DIM constant value.
+  mlir::FailureOr<int64_t> getConstDim() const {
+    int64_t dimVal = 0;
+    if (!isTotalReduction()) {
+      // In case of partial reduction we should ignore the operations
+      // with invalid DIM values. They may appear in dead code
+      // after constant propagation.
+      auto constDim = fir::getIntIfConstant(getDim());
+      if (!constDim)
+        return rewriter.notifyMatchFailure(op, "Nonconstant DIM");
+      dimVal = *constDim;
+
+      if ((dimVal <= 0 || dimVal > getSourceRank()))
+        return rewriter.notifyMatchFailure(op,
+                                           "Invalid DIM for partial reduction");
+    }
+    return dimVal;
+  }
+
+  /// Return hlfir::Entity of the result.
+  hlfir::Entity getResultEntity() const {
+    return hlfir::Entity{op->getResult(0)};
+  }
+
+  /// Return type of the result (e.g. !hlfir.expr<?xi32>).
+  mlir::Type getResultType() const { return getResultEntity().getType(); }
+
+  /// Return the element type of the result (e.g. i32).
+  mlir::Type getResultElementType() const {
+    return hlfir::getFortranElementType(getResultType());
+  }
+
+  /// Return rank of the result.
+  unsigned getResultRank() const { return getResultEntity().getRank(); }
+
+  /// Return the element type of the source.
+  mlir::Type getSourceElementType() const {
+    return hlfir::getFortranElementType(getSource().getType());
+  }
+
+  /// Return rank of the input array.
+  unsigned getSourceRank() const {
+    return hlfir::Entity{getSource()}.getRank();
+  }
+
+  /// The reduction operation.
+  mlir::Operation *op;
+
+  mlir::PatternRewriter &rewriter;
+  mlir::Location loc;
+  fir::FirOpBuilder builder;
+};
+
+/// Generate initialization value for MIN or MAX reduction
+/// of the given \p type.
+template <bool IS_MAX>
+static mlir::Value genMinMaxInitValue(mlir::Location loc,
+                                      fir::FirOpBuilder &builder,
+                                      mlir::Type type) {
+  if (auto ty = mlir::dyn_cast<mlir::FloatType>(type)) {
+    const llvm::fltSemantics &sem = ty.getFloatSemantics();
+    // We must not use +/-INF here. If the reduction input is empty,
+    // the result of reduction must be +/-LARGEST.
+    llvm::APFloat limit = llvm::APFloat::getLargest(sem, /*Negative=*/IS_MAX);
+    return builder.createRealConstant(loc, type, limit);
+  }
+  unsigned bits = type.getIntOrFloatBitWidth();
+  int64_t limitInt = IS_MAX
+                         ? llvm::APInt::getSignedMinValue(bits).getSExtValue()
+                         : llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+  return builder.createIntegerConstant(loc, type, limitInt);
+}
+
+/// Generate a comparison of an array element value \p elem
+/// and the current reduction value \p reduction for MIN/MAX reduction.
+template <bool IS_MAX>
+static mlir::Value
+genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
+                    mlir::Value elem, mlir::Value reduction) {
+  if (mlir::isa<mlir::FloatType>(reduction.getType())) {
+    // For FP reductions we want the first smallest value to be used, that
+    // is not NaN. A OGL/OLT condition will usually work for this unless all
+    // the values are Nan or Inf. This follows the same logic as
+    // NumericCompare for Minloc/Maxloc in extrema.cpp.
+    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
+        loc,
+        IS_MAX ? mlir::arith::CmpFPredicate::OGT
+               : mlir::arith::CmpFPredicate::OLT,
+        elem, reduction);
+    mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
+        loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
+    mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
+        loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
+    cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
+    return builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
+  } else if (mlir::isa<mlir::IntegerType>(reduction.getType())) {
+    return builder.create<mlir::arith::CmpIOp>(
+        loc,
+        IS_MAX ? mlir::arith::CmpIPredicate::sgt
+               : mlir::arith::CmpIPredicate::slt,
+        elem, reduction);
+  }
+  llvm_unreachable("unsupported type");
+}
+
+/// Implementation of ReductionAsElementalConverter interface
+/// for MAXLOC/MINLOC.
+template <typename T>
+class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
+  static_assert(std::is_same_v<T, hlfir::MaxlocOp> ||
+                std::is_same_v<T, hlfir::MinlocOp>);
+  static constexpr unsigned maxRank = Fortran::common::maxRank;
+  // We have the following reduction values in the reduction loop:
+  //   * N integer coordinates, where N is:
+  //     - RANK(ARRAY) for total reductions.
+  //     - 1 for partial reductions.
+  //   * 1 reduction value holding the current MIN/MAX.
+  //   * 1 boolean indicating whether it is the first time
+  //     the mask is true.
+  static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
+  static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
+  using Base = ReductionAsElementalConverter;
+
+public:
+  MinMaxlocAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+private:
+  virtual mlir::Value getSource() const final { return getOp().getArray(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
+  virtual mlir::Value getMask() const final { return getOp().getMask(); }
+  virtual mlir::arith::FastMathFlags getFastMath() const final {
+    return getOp().getFastmath();
+  }
+
+  virtual mlir::LogicalResult isConvertible() const final {
+    if (getOp().getBack())
+      return rewriter.notifyMatchFailure(
+          getOp(), "BACK is not supported for MINLOC/MAXLOC inlining");
+    if (mlir::isa<fir::CharacterType>(getSourceElementType()))
+      return rewriter.notifyMatchFailure(
+          getOp(),
+          "CHARACTER type is not supported for MINLOC/MAXLOC inlining");
+    return mlir::success();
+  }
+
+  // If the result is scalar, then DIM does not matter,
+  // and this is a total reduction.
+  // If DIM is not present, this is a total reduction.
+  virtual bool isTotalReduction() const final {
+    return getResultRank() == 0 || !getDim();
+  }
+
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final;
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) final;
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionResults) final;
+
+private:
+  T getOp() const { return mlir::cast<T>(op); }
+
+  unsigned getNumCoors() const {
+    return isTotalReduction() ? getSourceRank() : 1;
+  }
+
+  void
+  checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    assert(reductions.size() == getNumCoors() + 2 &&
+           "invalid number of reductions for MINLOC/MAXLOC");
+  }
+
+  mlir::Value
+  getCurrentMinMax(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    checkReductions(reductions);
+    return reductions[getNumCoors()];
+  }
+
+  mlir::Value
+  getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    checkReductions(reductions);
+    return reductions[getNumCoors() + 1];
+  }
+};
+
+template <typename T>
+llvm::SmallVector<mlir::Value>
+MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
+  // Initial value for the coordinate(s) is zero.
+  mlir::Value zeroCoor =
+      fir::factory::createZeroValue(builder, loc, getResultElementType());
+  llvm::SmallVector<mlir::Value> result(getNumCoors(), zeroCoor);
+
+  // Initial value for the MIN/MAX value.
+  mlir::Value minMaxInit =
+      genMinMaxInitValue<isMax>(loc, builder, getSourceElementType());
+  result.push_back(minMaxInit);
+
+  // Initial value for isFirst predicate. It is switched to false,
+  // when the reduction update dynamically happens inside the reduction
+  // loop.
+  mlir::Value trueVal = builder.createBool(loc, true);
+  result.push_back(trueVal);
+
+  return result;
+}
+
+template <typename T>
+llvm::SmallVector<mlir::Value>
+MinMaxlocAsElementalConverter<T>::reduceOneElement(
+    const llvm::SmallVectorImpl<mlir::Value> &currentValue, hlfir::Entity array,
+    mlir::ValueRange oneBasedIndices) {
+  checkReductions(currentValue);
+  hlfir::Entity elementValue =
+      hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+  mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
+                                               getCurrentMinMax(currentValue));
+  // If isFirst is true, then do the reduction update regardless
+  // of the FP comparison.
+  cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+
+  llvm::SmallVector<mlir::Value> newIndices;
+  int64_t dim = 1;
+  if (!isTotalReduction()) {
+    auto dimVal = getConstDim();
+    assert(mlir::succeeded(dimVal) &&
+           "partial MINLOC/MAXLOC reduction with invalid DIM");
+    dim = *dimVal;
+    assert(getNumCoors() == 1 &&
+           "partial MAXLOC/MINLOC reduction must compute one coordinate");
+  }
+
+  for (unsigned coorIdx = 0; coorIdx < getNumCoors(); ++coorIdx) {
+    mlir::Value currentCoor = currentValue[coorIdx];
+    mlir::Value newCoor = builder.createConvert(
+        loc, currentCoor.getType(), oneBasedIndices[coorIdx + dim - 1]);
+    mlir::Value update =
+        builder.create<mlir::arith::SelectOp>(loc, cmp, newCoor, currentCoor);
+    newIndices.push_back(update);
+  }
+
+  mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
+      loc, cmp, elementValue, getCurrentMinMax(currentValue));
+  newIndices.push_back(newMinMax);
+
+  mlir::Value newIsFirst = builder.createBool(loc, false);
+  newIndices.push_back(newIsFirst);
+
+  assert(currentValue.size() == newIndices.size() &&
+         "invalid number of updated reductions");
+
+  return newIndices;
+}
+
+template <typename T>
+hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
+    const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+  // Identification of the final result of MINLOC/MAXLOC:
+  //   * If DIM is absent, the result is rank-one array.
+  //   * If DIM is present:
+  //     - The result is scalar for rank-one input.
+  //     - The result is an array of rank RANK(ARRAY)-1.
+  checkReductions(reductionResults);
+
+  // 16.9.137 & 16.9.143:
+  // The subscripts returned by MINLOC/MAXLOC are in the range
+  // 1 to the extent of the corresponding dimension.
+  mlir::Type indexType = builder.getIndexType();
+
+  // For partial reductions, the final result of the reduction
+  // loop is just a scalar - the coordinate within DIM dimension.
+  if (getResultRank() == 0 || !isTotalReduction()) {
+    // The result is a scalar, so just return the scalar.
+    assert(getNumCoors() == 1 &&
+           "unpexpected number of coordinates for scalar result");
+    return hlfir::Entity{reductionResults[0]};
+  }
+  // This is a total reduction, and there is no wrapping hlfir.elemental.
+  // We have to pack the reduced coordinates into a rank-one array.
+  unsigned rank = getSourceRank();
+  // TODO: in order to avoid introducing new memory effects
+  // we should not use a temporary in memory.
+  // We can use hlfir.elemental with a switch to pack all the coordinates
+  // into an array expression, or we can have a dedicated HLFIR operation
+  // for this.
+  mlir::Value tempArray = builder.createTemporary(
+      loc, fir::SequenceType::get(rank, getResultElementType()));
+  for (unsigned i = 0; i < rank; ++i) {
+    mlir::Value coor = reductionResults[i];
+    mlir::Value idx = builder.createIntegerConstant(loc, indexType, i + 1);
+    mlir::Value resultElement =
+        hlfir::getElementAt(loc, builder, hlfir::Entity{tempArray}, {idx});
+    builder.create<hlfir::AssignOp>(loc, coor, resultElement);
+  }
+  mlir::Value tempExpr = builder.create<hlfir::AsExprOp>(
+      loc, tempArray, builder.createBool(loc, false));
+  return hlfir::Entity{tempExpr};
+}
+
+/// Base class for numeric reductions like MAXVAl, MINVAL, SUM.
+template <typename OpT>
+class NumericReductionAsElementalConverterBase
+    : public ReductionAsElementalConverter {
+  using Base = ReductionAsElementalConverter;
+
+protected:
+  NumericReductionAsElementalConverterBase(OpT op,
+                                           mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+  virtual mlir::Value getSource() const final { return getOp().getArray(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
+  virtual mlir::Value getMask() const final { return getOp().getMask(); }
+  virtual mlir::arith::FastMathFlags getFastMath() const final {
+    return getOp().getFastmath();
+  }
+
+  OpT getOp() const { return mlir::cast<OpT>(op); }
+
+  void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
+    assert(reductions.size() == 1 && "reduction must produce single value");
+  }
+};
+
+/// Reduction converter for MAXMAL/MINVAL.
+template <typename T>
+class MinMaxvalAsElementalConverter
+    : public NumericReductionAsElementalConverterBase<T> {
+  static_assert(std::is_same_v<T, hlfir::MaxvalOp> ||
+                std::is_same_v<T, hlfir::MinvalOp>);
+  // We have two reduction values:
+  //   * The current MIN/MAX value.
+  //   * 1 boolean indicating whether it is the first time
+  //     the mask is true.
+  //
+  // The boolean flag is used to replace the initial value
+  // with the first input element even if it is NaN.
+  static constexpr unsigned numReductions = 2;
+  static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
+  using Base = NumericReductionAsElementalConverterBase<T>;
+
+public:
+  MinMaxvalAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+private:
+  virtual mlir::LogicalResult isConvertible() const final {
+    if (mlir::isa<fir::CharacterType>(this->getSourceElementType()))
+      return this->rewriter.notifyMatchFailure(
+          this->getOp(),
+          "CHARACTER type is not supported for MINVAL/MAXVAL inlining");
+    return mlir::success();
+  }
+
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+    llvm::SmallVector<mlir::Value> result;
+    fir::FirOpBuilder &builder = this->builder;
+    mlir::Location loc = this->loc;
+    mlir::Value init =
+        genMinMaxInitValue<isMax>(loc, builder, this->getResultElementType());
+    result.push_back(init);
+    // Initial value for isFirst predicate. It is switched to false,
+    // when the reduction update dynamically happens inside the reduction
+    // loop.
+    result.push_back(builder.createBool(loc, true));
+    return result;
+  }
+
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
+    this->checkReductions(currentValue);
+    llvm::SmallVector<mlir::Value> result;
+    fir::FirOpBuilder &builder = this->builder;
+    mlir::Location loc = this->loc;
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value currentMinMax = getCurrentMinMax(currentValue);
+    mlir::Value cmp =
+        genMinMaxComparison<isMax>(loc, builder, elementValue, currentMinMax);
+    cmp =
+        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+    mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
+        loc, cmp, elementValue, currentMinMax);
+    result.push_back(newMinMax);
+    result.push_back(builder.createBool(loc, false));
+    return result;
+  }
+
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionResults) final {
+    this->checkReductions(reductionResults);
+    return hlfir::Entity{getCurrentMinMax(reductionResults)};
+  }
+
+  void
+  checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    assert(reductions.size() == numReductions &&
+           "invalid number of reductions for MINVAL/MAXVAL");
+  }
+
+  mlir::Value
+  getCurrentMinMax(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    this->checkReductions(reductions);
+    return reductions[0];
+  }
+
+  mlir::Value
+  getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    this->checkReductions(reductions);
+    return reductions[1];
+  }
+};
+
+/// Reduction converter for SUM.
+class SumAsElementalConverter
+    : public NumericReductionAsElementalConverterBase<hlfir::SumOp> {
+  using Base = NumericReductionAsElementalConverterBase;
+
+public:
+  SumAsElementalConverter(hlfir::SumOp op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+    return {
+        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+  }
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
+    checkReductions(currentValue);
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    // NOTE: we can use "Kahan summation" same way as the runtime
+    // (e.g. when fast-math is not allowed), but let's start with
+    // the simple version.
+    return {genScalarAdd(currentValue[0], elementValue)};
   }
 
   // Generate scalar addition of the two values (of the same data type).
-  static mlir::Value genScalarAdd(mlir::Location loc,
-                                  fir::FirOpBuilder &builder,
-                                  mlir::Value value1, mlir::Value value2) {
-    mlir::Type ty = value1.getType();
-    assert(ty == value2.getType() && "reduction values' types do not match");
-    if (mlir::isa<mlir::FloatType>(ty))
-      return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
-    else if (mlir::isa<mlir::ComplexType>(ty))
-      return builder.create<fir::AddcOp>(loc, value1, value2);
-    else if (mlir::isa<mlir::IntegerType>(ty))
-      return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
+  mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2);
+};
 
-    llvm_unreachable("unsupported SUM reduction type");
+/// Base class for logical reductions like ALL, ANY, COUNT.
+/// They do not have MASK and FastMathFlags.
+template <typename OpT>
+class LogicalReductionAsElementalConverterBase
+    : public ReductionAsElementalConverter {
+  using Base = ReductionAsElementalConverter;
+
+public:
+  LogicalReductionAsElementalConverterBase(OpT op,
+                                           mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+protected:
+  OpT getOp() const { return mlir::cast<OpT>(op); }
+
+  void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
+    assert(reductions.size() == 1 && "reduction must produce single value");
   }
 
-  static mlir::Value genMaskValue(mlir::Location loc,
-                                  fir::FirOpBuilder &builder, mlir::Value mask,
-                                  mlir::Value isPresentPred,
-                                  mlir::ValueRange indices) {
-    mlir::OpBuilder::InsertionGuard guard(builder);
-    fir::IfOp ifOp;
-    mlir::Type maskType =
-        hlfir::getFortranElementType(fir::unwrapPassByRefType(mask.getType()));
-    if (isPresentPred) {
-      ifOp = builder.create<fir::IfOp>(loc, maskType, isPresentPred,
-                                       /*withElseRegion=*/true);
+  virtual mlir::Value getSource() const final { return getOp().getMask(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
 
-      // Use 'true', if the mask is not present.
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      mlir::Value trueValue = builder.createBool(loc, true);
-      trueValue = builder.createConvert(loc, maskType, trueValue);
-      builder.create<fir::ResultOp>(loc, trueValue);
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionResults) override {
+    checkReductions(reductionResults);
+    return hlfir::Entity{reductionResults[0]};
+  }
+};
 
-      // Load the mask value, if the mask is present.
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+/// Reduction converter for ALL/ANY.
+template <typename T>
+class AllAnyAsElementalConverter
+    : public LogicalReductionAsElementalConverterBase<T> {
+  static_assert(std::is_same_v<T, hlfir::AllOp> ||
+                std::is_same_v<T, hlfir::AnyOp>);
+  static constexpr bool isAll = std::is_same_v<T, hlfir::AllOp>;
+  using Base = LogicalReductionAsElementalConverterBase<T>;
+
+public:
+  AllAnyAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+    return {this->builder.createBool(this->loc, isAll ? true : false)};
+  }
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
+    this->checkReductions(currentValue);
+    fir::FirOpBuilder &builder = this->builder;
+    mlir::Location loc = this->loc;
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value mask =
+        builder.createConvert(loc, builder.getI1Type(), elementValue);
+    if constexpr (isAll)
+      return {builder.create<mlir::arith::AndIOp>(loc, mask, currentValue[0])};
+    else
+      return {builder.create<mlir::arith::OrIOp>(loc, mask, currentValue[0])};
+  }
+
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionValues) final {
+    this->checkReductions(reductionValues);
+    return hlfir::Entity{this->builder.createConvert(
+        this->loc, this->getResultElementType(), reductionValues[0])};
+  }
+};
+
+/// Reduction converter for COUNT.
+class CountAsElementalConverter
+    : public LogicalReductionAsElementalConverterBase<hlfir::CountOp> {
+  using Base = LogicalReductionAsElementalConverterBase<hlfir::CountOp>;
+
+public:
+  CountAsElementalConverter(hlfir::CountOp op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+    return {
+        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+  }
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
+    checkReductions(currentValue);
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value cond =
+        builder.createConvert(loc, builder.getI1Type(), elementValue);
+    mlir::Value one =
+        builder.createIntegerConstant(loc, getResultElementType(), 1);
+    mlir::Value add1 =
+        builder.create<mlir::arith::AddIOp>(loc, currentValue[0], one);
+    return {builder.create<mlir::arith::SelectOp>(loc, cond, add1,
+                                                  currentValue[0])};
+  }
+};
+
+mlir::LogicalResult ReductionAsElementalConverter::convert() {
+  mlir::LogicalResult canConvert(isConvertible());
+
+  if (mlir::failed(canConvert))
+    return canConvert;
+
+  hlfir::Entity array = hlfir::Entity{getSource()};
+  bool isTotalReduce = isTotalReduction();
+  auto dimVal = getConstDim();
+  if (mlir::failed(dimVal))
+    return dimVal;
+  mlir::Value mask = getMask();
+  mlir::Value resultShape, dimExtent;
+  llvm::SmallVector<mlir::Value> arrayExtents;
+  if (isTotalReduce)
+    arrayExtents = hlfir::genExtentsVector(loc, builder, array);
+  else
+    std::tie(resultShape, dimExtent) =
+        genResultShapeForPartialReduction(array, *dimVal);
+
+  // If the mask is present and is a scalar, then we'd better load its value
+  // outside of the reduction loop making the loop unswitching easier.
+  mlir::Value isPresentPred, maskValue;
+  if (mask) {
+    if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
+      // MASK represented by a box might be dynamically optional,
+      // so we have to check for its presence before accessing it.
+      isPresentPred =
+          builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
     }
 
-    hlfir::Entity maskVar{mask};
-    if (maskVar.isScalar()) {
-      if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
-        // MASK may be a boxed scalar.
-        mlir::Value addr = hlfir::genVariableRawAddress(loc, builder, maskVar);
-        mask = builder.create<fir::LoadOp>(loc, hlfir::Entity{addr});
+    if (hlfir::Entity{mask}.isScalar())
+      maskValue = genMaskValue(mask, isPresentPred, {});
+  }
+
+  auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                       mlir::ValueRange inputIndices) -> hlfir::Entity {
+    // Loop over all indices in the DIM dimension, and reduce all values.
+    // If DIM is not present, do total reduction.
+
+    // Initial value for the reduction.
+    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
+        genReductionInitValues();
+
+    llvm::SmallVector<mlir::Value> extents;
+    if (isTotalReduce)
+      extents = arrayExtents;
+    else
+      extents.push_back(
+          builder.createConvert(loc, builder.getIndexType(), dimExtent));
+
+    auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                       mlir::ValueRange oneBasedIndices,
+                       mlir::ValueRange reductionArgs)
+        -> llvm::SmallVector<mlir::Value, 1> {
+      // Generate the reduction loop-nest body.
+      // The initial reduction value in the innermost loop
+      // is passed via reductionArgs[0].
+      llvm::SmallVector<mlir::Value> indices;
+      if (isTotalReduce) {
+        indices = oneBasedIndices;
       } else {
-        mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
+        indices = inputIndices;
+        indices.insert(indices.begin() + *dimVal - 1, oneBasedIndices[0]);
       }
+
+      llvm::SmallVector<mlir::Value, 1> reductionValues = reductionArgs;
+      llvm::SmallVector<mlir::Type, 1> reductionTypes;
+      llvm::transform(reductionValues, std::back_inserter(reductionTypes),
+                      [](mlir::Value v) { return v.getType(); });
+      fir::IfOp ifOp;
+      if (mask) {
+        // Make the reduction value update conditional on the value
+        // of the mask.
+        if (!maskValue) {
+          // If the mask is an array, use the elemental and the loop indices
+          // to address the proper mask element.
+          maskValue = genMaskValue(mask, isPresentPred, indices);
+        }
+        mlir::Value isUnmasked =
+            builder.create<fir::ConvertOp>(loc, builder.getI1Type(), maskValue);
+        ifOp = builder.create<fir::IfOp>(loc, reductionTypes, isUnmasked,
+                                         /*withElseRegion=*/true);
+        // In the 'else' block return the current reduction value.
+        builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+        builder.create<fir::ResultOp>(loc, reductionValues);
+
+        // In the 'then' block do the actual addition.
+        builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+      }
+      reductionValues = reduceOneElement(reductionValues, array, indices);
+      if (ifOp) {
+        builder.create<fir::ResultOp>(loc, reductionValues);
+        builder.setInsertionPointAfter(ifOp);
+        reductionValues = ifOp.getResults();
+      }
+
+      return reductionValues;
+    };
+
+    llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
+        hlfir::genLoopNestWithReductions(
+            loc, builder, extents, reductionInitValues, genBody, isUnordered());
+    return genFinalResult(reductionFinalValues);
+  };
+
+  if (isTotalReduce) {
+    hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+
+  hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
+      loc, builder, getResultElementType(), resultShape, /*typeParams=*/{},
+      genKernel,
+      /*isUnordered=*/true, /*polymorphicMold=*/nullptr, getResultType());
+
+  // it wouldn't be safe to replace block arguments with a different
+  // hlfir.expr type. Types can differ due to differing amounts of shape
+  // information
+  assert(elementalOp.getResult().getType() == op->getResult(0).getType());
+
+  rewriter.replaceOp(op, elementalOp);
+  return mlir::success();
+}
+
+std::tuple<mlir::Value, mlir::Value>
+ReductionAsElementalConverter::genResultShapeForPartialReduction(
+    hlfir::Entity array, int64_t dimVal) {
+  llvm::SmallVector<mlir::Value> inExtents =
+      hlfir::genExtentsVector(loc, builder, array);
+  assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
+         "DIM must be present and a positive constant not exceeding "
+         "the array's rank");
+
+  mlir::Value dimExtent = inExtents[dimVal - 1];
+  inExtents.erase(inExtents.begin() + dimVal - 1);
+  return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+}
+
+mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
+                                                  mlir::Value value2) {
+  mlir::Type ty = value1.getType();
+  assert(ty == value2.getType() && "reduction values' types do not match");
+  if (mlir::isa<mlir::FloatType>(ty))
+    return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
+  else if (mlir::isa<mlir::ComplexType>(ty))
+    return builder.create<fir::AddcOp>(loc, value1, value2);
+  else if (mlir::isa<mlir::IntegerType>(ty))
+    return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
+
+  llvm_unreachable("unsupported SUM reduction type");
+}
+
+mlir::Value ReductionAsElementalConverter::genMaskValue(
+    mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) {
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  fir::IfOp ifOp;
+  mlir::Type maskType =
+      hlfir::getFortranElementType(fir::unwrapPassByRefType(mask.getType()));
+  if (isPresentPred) {
+    ifOp = builder.create<fir::IfOp>(loc, maskType, isPresentPred,
+                                     /*withElseRegion=*/true);
+
+    // Use 'true', if the mask is not present.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    mlir::Value trueValue = builder.createBool(loc, true);
+    trueValue = builder.createConvert(loc, maskType, trueValue);
+    builder.create<fir::ResultOp>(loc, trueValue);
+
+    // Load the mask value, if the mask is present.
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  }
+
+  hlfir::Entity maskVar{mask};
+  if (maskVar.isScalar()) {
+    if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
+      // MASK may be a boxed scalar.
+      mlir::Value addr = hlfir::genVariableRawAddress(loc, builder, maskVar);
+      mask = builder.create<fir::LoadOp>(loc, hlfir::Entity{addr});
     } else {
-      // Load from the mask array.
-      assert(!indices.empty() && "no indices for addressing the mask array");
-      maskVar = hlfir::getElementAt(loc, builder, maskVar, indices);
       mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
     }
+  } else {
+    // Load from the mask array.
+    assert(!indices.empty() && "no indices for addressing the mask array");
+    maskVar = hlfir::getElementAt(loc, builder, maskVar, indices);
+    mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
+  }
 
-    if (!isPresentPred)
-      return mask;
+  if (!isPresentPred)
+    return mask;
 
-    builder.create<fir::ResultOp>(loc, mask);
-    return ifOp.getResult(0);
+  builder.create<fir::ResultOp>(loc, mask);
+  return ifOp.getResult(0);
+}
+
+/// Convert an operation that is a partial or total reduction
+/// over an array of values into a reduction loop[-nest]
+/// optionally wrapped into hlfir.elemental.
+template <typename Op>
+class ReductionConversion : public mlir::OpRewritePattern<Op> {
+public:
+  using mlir::OpRewritePattern<Op>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
+    if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
+                  std::is_same_v<Op, hlfir::MinlocOp>) {
+      MinMaxlocAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
+                         std::is_same_v<Op, hlfir::MinvalOp>) {
+      MinMaxvalAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
+      CountAsElementalConverter converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::AllOp> ||
+                         std::is_same_v<Op, hlfir::AnyOp>) {
+      AllAnyAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::SumOp>) {
+      SumAsElementalConverter converter{op, rewriter};
+      return converter.convert();
+    }
+    return rewriter.notifyMatchFailure(op, "unexpected reduction operation");
   }
 };
 
@@ -1481,10 +2137,18 @@
 
     mlir::RewritePatternSet patterns(context);
     patterns.insert<TransposeAsElementalConversion>(context);
-    patterns.insert<SumAsElementalConversion>(context);
+    patterns.insert<ReductionConversion<hlfir::SumOp>>(context);
     patterns.insert<CShiftConversion>(context);
     patterns.insert<MatmulConversion<hlfir::MatmulTransposeOp>>(context);
 
+    patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MaxlocOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MinlocOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MaxvalOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MinvalOp>>(context);
+
     // If forceMatmulAsElemental is false, then hlfir.matmul inlining
     // will introduce hlfir.eval_in_mem operation with new memory side
     // effects. This conflicts with CSE and optimized bufferization, e.g.:
diff --git a/flang/test/HLFIR/all-elemental.fir b/flang/test/HLFIR/all-elemental.fir
deleted file mode 100644
index 1ba8bb1..0000000
--- a/flang/test/HLFIR/all-elemental.fir
+++ /dev/null
@@ -1,91 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.all %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
-  return %13 : !fir.logical<4>
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-// CHECK-NEXT:     %true = arith.constant true
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %true) -> (i1) {
-// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.andi %arg4, %[[V16]] : i1
-// CHECK-NEXT:       fir.result %[[V17]] : i1
-// CHECK-NEXT:     }
-// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
-// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
-
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
-  %c2_i32 = arith.constant 2 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.all %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
-  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
-  return %11 : !fir.array<4x!fir.logical<4>>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: %10 = hlfir.all %9 dim %c2_i32
\ No newline at end of file
diff --git a/flang/test/HLFIR/any-elemental.fir b/flang/test/HLFIR/any-elemental.fir
deleted file mode 100644
index a7c5596..0000000
--- a/flang/test/HLFIR/any-elemental.fir
+++ /dev/null
@@ -1,190 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.any %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
-  return %13 : !fir.logical<4>
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-// CHECK-NEXT:     %false = arith.constant false
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %false) -> (i1) {
-// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.ori %arg4, %[[V16]] : i1
-// CHECK-NEXT:       fir.result %[[V17]] : i1
-// CHECK-NEXT:     }
-// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
-// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
-
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
-  %c2_i32 = arith.constant 2 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.any %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
-  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
-  return %11 : !fir.array<4x!fir.logical<4>>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: {{.*}} = hlfir.any {{.*}} dim %c2_i32
-
-
-func.func @_Qtest_recursive() attributes {fir.bindc_name = "test"} {
-  %c1 = arith.constant 1 : index
-  %true = arith.constant true
-  %false = arith.constant false
-  %c0_i64 = arith.constant 0 : i64
-  %c2_i32 = arith.constant 2 : i32
-  %c0 = arith.constant 0 : index
-  %c1_i32 = arith.constant 1 : i32
-  %0 = fir.address_of(@_QFEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
-  %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFEn"}
-  %5:2 = hlfir.declare %4 {uniq_name = "_QFEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "ra", uniq_name = "_QFEra"}
-  %7 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %8:2 = hlfir.declare %6(%7) {uniq_name = "_QFEra"} : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.ref<!fir.array<1x!fir.logical<4>>>)
-  %9 = fir.alloca !fir.logical<4> {bindc_name = "rs", uniq_name = "_QFErs"}
-  %10:2 = hlfir.declare %9 {uniq_name = "_QFErs"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %11 = fir.allocmem !fir.array<?xi32>, %c1 {fir.must_be_heap = true, uniq_name = "_QFEa.alloc"}
-  %12 = fir.embox %11(%7) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
-  fir.store %12 to %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  hlfir.assign %c1_i32 to %5#0 : i32, !fir.ref<i32>
-  %13 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  %14:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-  fir.do_loop %arg0 = %c1 to %14#1 step %c1 unordered {
-    %27:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-    %28 = arith.subi %27#0, %c1 : index
-    %29 = arith.addi %arg0, %28 : index
-    %30 = hlfir.designate %13 (%29)  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-    hlfir.assign %c2_i32 to %30 : i32, !fir.ref<i32>
-  }
-  %15 = fir.load %5#0 : !fir.ref<i32>
-  %16 = fir.convert %15 : (i32) -> i64
-  %17 = arith.cmpi sgt, %16, %c0_i64 : i64
-  %18 = arith.select %17, %16, %c0_i64 : i64
-  %19 = fir.convert %18 : (i64) -> index
-  %20 = fir.shape %19 : (index) -> !fir.shape<1>
-  %21 = hlfir.elemental %20 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg0: index):
-    %27 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-    %28:3 = fir.box_dims %27, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-    %29 = arith.addi %28#0, %28#1 : index
-    %30 = arith.subi %29, %c1 : index
-    %31 = arith.subi %30, %28#0 : index
-    %32 = arith.addi %31, %c1 : index
-    %33 = arith.cmpi sgt, %32, %c0 : index
-    %34 = arith.select %33, %32, %c0 : index
-    %35 = fir.shape %34 : (index) -> !fir.shape<1>
-    %36 = hlfir.designate %27 (%28#0:%30:%c1)  shape %35 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
-    %37 = hlfir.elemental %35 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-    ^bb0(%arg1: index):
-      %39 = hlfir.designate %36 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-      %40 = fir.load %39 : !fir.ref<i32>
-      %41 = arith.cmpi eq, %40, %c1_i32 : i32
-      %42 = fir.convert %41 : (i1) -> !fir.logical<4>
-      hlfir.yield_element %42 : !fir.logical<4>
-    }
-    %38 = hlfir.any %37 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
-    hlfir.destroy %37 : !hlfir.expr<?x!fir.logical<4>>
-    hlfir.yield_element %38 : !fir.logical<4>
-  }
-  %22 = hlfir.any %21 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %22 to %10#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %21 : !hlfir.expr<?x!fir.logical<4>>
-  %23 = fir.load %10#0 : !fir.ref<!fir.logical<4>>
-  %24 = fir.convert %23 : (!fir.logical<4>) -> i1
-  %25 = arith.xori %24, %true : i1
-  cf.cond_br %25, ^bb1, ^bb2
-^bb1:  // pred: ^bb0
-  fir.call @_FortranAStopStatement(%c2_i32, %false, %false) fastmath<contract> : (i32, i1, i1) -> ()
-  fir.unreachable
-^bb2:  // pred: ^bb0
-  return
-}
-// CHECK-LABEL: func.func @_Qtest_recursive()
-// CHECK:    %[[V20:.*]] = fir.do_loop %arg0 = %c1 to %{{.*}} step %c1 iter_args(%arg1 = %false) -> (i1) {
-// CHECK:      %[[V26:.*]] = fir.load %[[V1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-// CHECK:      %[[V27:.*]]:3 = fir.box_dims %[[V26]], %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-// CHECK:      %[[V28:.*]] = arith.addi %[[V27]]#0, %[[V27]]#1 : index
-// CHECK:      %[[V29:.*]] = arith.subi %[[V28]], %c1 : index
-// CHECK:      %[[V30:.*]] = arith.subi %[[V29]], %[[V27]]#0 : index
-// CHECK:      %[[V31:.*]] = arith.addi %[[V30]], %c1 : index
-// CHECK:      %[[V32:.*]] = arith.cmpi sgt, %[[V31]], %c0 : index
-// CHECK:      %[[V33:.*]] = arith.select %[[V32]], %[[V31]], %c0 : index
-// CHECK:      %[[V34:.*]] = fir.shape %[[V33]] : (index) -> !fir.shape<1>
-// CHECK:      %[[V35:.*]] = hlfir.designate %[[V26]] (%[[V27]]#0:%[[V29]]:%c1)  shape %[[V34]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
-// CHECK:      %[[V36:.*]] = fir.do_loop %arg2 = %c1 to %[[V33]] step %c1 iter_args(%arg3 = %false) -> (i1) {
-// CHECK:        %[[V38:.*]] = hlfir.designate %[[V35]] (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:        %[[V39:.*]] = fir.load %[[V38]] : !fir.ref<i32>
-// CHECK:        %[[V40:.*]] = arith.cmpi eq, %[[V39]], %c1_i32 : i32
-// CHECK:        %[[V41:.*]] = arith.ori %arg3, %[[V40]] : i1
-// CHECK:        fir.result %[[V41]] : i1
-// CHECK:      }
-// CHECK:      %[[V37:.*]] = arith.ori %arg1, %[[V36]] : i1
-// CHECK:      fir.result %[[V37]] : i1
-// CHECK:    }
diff --git a/flang/test/HLFIR/count-elemental.fir b/flang/test/HLFIR/count-elemental.fir
deleted file mode 100644
index 0df5cc3..0000000
--- a/flang/test/HLFIR/count-elemental.fir
+++ /dev/null
@@ -1,314 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %12 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<i32>
-  return %13 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
-// CHECK-NEXT:       fir.result %[[V17]] : i32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
-// CHECK-NEXT:     return %[[V12]] : i32
-
-func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i16 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i16
-  hlfir.assign %12 to %4#0 : i16, !fir.ref<i16>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<i16>
-  return %13 : i16
-}
-// CHECK-LABEL:  func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
-// CHECK-NEXT:     %c1_i16 = arith.constant 1 : i16
-// CHECK-NEXT:     %c0_i16 = arith.constant 0 : i16
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i16
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i16) -> (i16) {
-// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i16 : i16
-// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i16
-// CHECK-NEXT:       fir.result %[[V17]] : i16
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i16, !fir.ref<i16>
-// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i16>
-// CHECK-NEXT:     return %[[V12]] : i16
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<7xi32> {
-  %c1_i32 = arith.constant 1 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<7xi32> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<7xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<7xi32>>, !fir.ref<!fir.array<7xi32>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.count %9 dim %c1_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<7xi32>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<7xi32>, !fir.ref<!fir.array<7xi32>>
-  hlfir.destroy %10 : !hlfir.expr<7xi32>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<7xi32>>
-  return %11 : !fir.array<7xi32>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: %{{.*}} = hlfir.count %{{.*}} dim %c1_i32
-
-
-func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %c2 = arith.constant 2 : index
-  %0 = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %0 : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
-  %7 = fir.load %5#0 : !fir.ref<i32>
-  %8 = hlfir.elemental %0 unordered : (!fir.shape<3>) -> !hlfir.expr<4x7x2x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index, %arg5: index):
-    %11 = hlfir.designate %6 (%arg3, %arg4, %arg5)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
-    %12 = fir.load %11 : !fir.ref<i32>
-    %13 = arith.cmpi sge, %12, %7 : i32
-    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %14 : !fir.logical<4>
-  }
-  %9 = hlfir.count %8 : (!hlfir.expr<4x7x2x!fir.logical<4>>) -> i32
-  hlfir.assign %9 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %8 : !hlfir.expr<4x7x2x!fir.logical<4>>
-  %10 = fir.load %4#1 : !fir.ref<i32>
-  return %10 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %c2 = arith.constant 2 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]]) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]] {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %[[V0]] : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
-// CHECK-NEXT:     %[[V7:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V8:.*]] = fir.do_loop %arg3 = %c1 to %c2 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK-NEXT:       %[[V10:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %arg4) -> (i32) {
-// CHECK-NEXT:         %[[V11:.*]] = fir.do_loop %arg7 = %c1 to %c4 step %c1 iter_args(%arg8 = %arg6) -> (i32) {
-// CHECK-NEXT:           %[[V12:.*]] = hlfir.designate %[[V6]] (%arg7, %arg5, %arg3)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
-// CHECK-NEXT:           %[[V13:.*]] = fir.load %[[V12]] : !fir.ref<i32>
-// CHECK-NEXT:           %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V7]] : i32
-// CHECK-NEXT:           %[[V15:.*]] = arith.addi %arg8, %c1_i32 : i32
-// CHECK-NEXT:           %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg8 : i32
-// CHECK-NEXT:           fir.result %[[V16]] : i32
-// CHECK-NEXT:         }
-// CHECK-NEXT:         fir.result %[[V11]] : i32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V10]] : i32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V8]] to %[[V4]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:     %[[V9:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
-// CHECK-NEXT:     return %[[V9]] : i32
-
-
-
-
-
-func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7xi32> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %16 = fir.load %15 : !fir.ref<i32>
-    hlfir.yield_element %16 : i32
-  }
-  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.sum %11 : (!hlfir.expr<7xi32>) -> i32
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7xi32>
-  %14 = fir.load %4#1 : !fir.ref<i32>
-  return %14 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK:    %[[V12:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK:      %[[V14:.*]] = hlfir.sum %[[V11]] : (!hlfir.expr<7xi32>) -> i32
-// CHECK:      %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK:      %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK:      %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
-// CHECK:      fir.result %[[V17]] : i32
-// CHECK:    }
-
-
-
-
-func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %16 = fir.load %15 : !fir.ref<i32>
-    %17 = arith.cmpi sge, %16, %10 : i32
-    %18 = fir.convert %17 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %18 : !fir.logical<4>
-  }
-  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %14 = fir.load %4#1 : !fir.ref<i32>
-  return %14 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK:    %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK:      %[[V13:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %c0_i32) -> (i32) {
-// CHECK:        %[[V17:.*]] = hlfir.designate %[[V9]] (%arg5)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK:        %[[V18:.*]] = fir.load %[[V17]] : !fir.ref<i32>
-// CHECK:        %[[V19:.*]] = arith.cmpi sge, %[[V18]], %[[V10]] : i32
-// CHECK:        %[[V20:.*]] = arith.addi %arg6, %c1_i32 : i32
-// CHECK:        %[[V21:.*]] = arith.select %[[V19]], %[[V20]], %arg6 : i32
-// CHECK:        fir.result %[[V21]] : i32
-// CHECK:      }
-// CHECK:      %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V10]] : i32
-// CHECK:      %[[V15:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK:      %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg4 : i32
-// CHECK:      fir.result %[[V16]] : i32
-// CHECK:    }
diff --git a/flang/test/HLFIR/maxloc-elemental.fir b/flang/test/HLFIR/maxloc-elemental.fir
deleted file mode 100644
index c9210a5..0000000
--- a/flang/test/HLFIR/maxloc-elemental.fir
+++ /dev/null
@@ -1,133 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.maxloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c-2147483648_i32 = arith.constant -2147483648 : i32
-// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i32
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi32>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c-2147483648_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi sgt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3 = fir.load %2#0 : !fir.ref<f32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-    %9 = fir.load %8 : !fir.ref<f32>
-    %10 = arith.cmpf oge, %9, %3 : f32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.maxloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant 0xFF800000 : f32
-// CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
-// CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
-// CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
-// CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf ogt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
-// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
-// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
-// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
-// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:         fir.result %[[V20]] : f32
-// CHECK-NEXT:       } else {
-// CHECK-NEXT:         fir.result %arg4 : f32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V22]] : f32
-// CHECK-NEXT:     } else {
-// CHECK-NEXT:       fir.result %arg4 : f32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     fir.result %[[V18]] : f32
-// CHECK-NEXT:   }
-
diff --git a/flang/test/HLFIR/maxval-elemental.fir b/flang/test/HLFIR/maxval-elemental.fir
deleted file mode 100644
index a21b485..0000000
--- a/flang/test/HLFIR/maxval-elemental.fir
+++ /dev/null
@@ -1,117 +0,0 @@
-// Test maxval inlining for both elemental and designate
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-// subroutine test(array)
-//   integer :: array(:), x
-//   x = maxval(abs(array))
-// end subroutine test
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-  %c31_i32 = arith.constant 31 : i32
-  %c0 = arith.constant 0 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg1: index):
-    %8 = hlfir.designate %1#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.shrsi %9, %c31_i32 : i32
-    %11 = arith.xori %9, %10 : i32
-    %12 = arith.subi %11, %10 : i32
-    hlfir.yield_element %12 : i32
-  }
-  %7 = hlfir.maxval %6 {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>) -> i32
-  hlfir.assign %7 to %3#0 : i32, !fir.ref<i32>
-  hlfir.destroy %6 : !hlfir.expr<?xi32>
-  return
-}
-
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c-2147483648_i32 = arith.constant -2147483648 : i32
-// CHECK-NEXT:    %c31_i32 = arith.constant 31 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %[[V0:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 dummy_scope %[[V0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[V2]] {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V5:.*]] = fir.do_loop %arg1 = %c1 to %[[V4]]#1 step %c1 iter_args(%arg2 = %c-2147483648_i32) -> (i32) {
-// CHECK-NEXT:      %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V7:.*]] = fir.load %[[V6]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V8:.*]] = arith.shrsi %[[V7]], %c31_i32 : i32
-// CHECK-NEXT:      %[[V9:.*]] = arith.xori %[[V7]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V10:.*]] = arith.subi %[[V9]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpi sgt, %[[V10]], %arg2 : i32
-// CHECK-NEXT:      %[[V12:.*]] = arith.select %[[V11]], %[[V10]], %arg2 : i32
-// CHECK-NEXT:      fir.result %[[V12]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V5]] to %[[V3]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// subroutine test(array)
-//   real :: array(:), x
-//   x = maxval(array(3:6))
-// end subroutine test
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}) {
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  %c6 = arith.constant 6 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %2 = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5 = hlfir.designate %1#0 (%c3:%c6:%c1)  shape %4 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-  %6 = hlfir.maxval %5 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<4xf32>>) -> f32
-  hlfir.assign %6 to %3#0 : f32, !fir.ref<f32>
-  return
-}
-
-// CHECK-LABEL: _QPtest_float
-// CHECK:       %cst = arith.constant 0xFF800000 : f32
-// CHECK:       %[[V4:.*]] = fir.shape %c4 : (index) -> !fir.shape<1>
-// CHECK-NEXT:  %[[V5:.*]] = hlfir.designate %{{.*}} (%c3:%c6:%c1)  shape %[[V4]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK-NEXT:  %[[V6:.*]] = fir.do_loop %arg1 = %c1 to %c4 step %c1 iter_args(%arg2 = %cst) -> (f32) {
-// CHECK-NEXT:      %[[V7:.*]] = hlfir.designate %[[V5]] (%arg1)  : (!fir.box<!fir.array<4xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:      %[[V8:.*]] = fir.load %[[V7]] : !fir.ref<f32>
-// CHECK-NEXT:      %[[V9:.*]] = arith.cmpf ogt, %[[V8]], %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V10:.*]] = arith.cmpf une, %arg2, %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpf oeq, %[[V8]], %[[V8]] fastmath<contract> : f32
-// CHECK-NEXT:      %[[V12:.*]] = arith.andi %[[V10]], %[[V11]] : i1
-// CHECK-NEXT:      %[[V13:.*]] = arith.ori %[[V9]], %[[V12]] : i1
-// CHECK-NEXT:      %[[V14:.*]] = arith.select %[[V13]], %[[V8]], %arg2 : f32
-// CHECK-NEXT:      fir.result %[[V14]] : f32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V6]] to %3#0 : f32, !fir.ref<f32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// Verify that lower bounds of designator are applied in the indexing inside
-// the generated loop (hlfir.designate takes indices relative to the base lower
-// bounds).
-func.func @component_lower_bounds(%arg0: !fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>) -> i32 {
-  %c10 = arith.constant 10 : index
-  %c101 = arith.constant 101 : index
-  %4 = fir.shape_shift %c101, %c10 : (index, index) -> !fir.shapeshift<1>
-  %5 = hlfir.designate %arg0{"i"}   shape %4 : (!fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
-  %6 = hlfir.maxval %5 : (!fir.box<!fir.array<10xi32>>) -> i32
-  return %6 : i32
-}
-// CHECK-LABEL:   func.func @component_lower_bounds(
-// CHECK:  %[[VAL_1:.*]] = arith.constant 100 : index
-// CHECK:  %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK:  %[[VAL_4:.*]] = arith.constant 10 : index
-// CHECK:  %[[VAL_5:.*]] = arith.constant 101 : index
-// CHECK:  %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]] : (index, index) -> !fir.shapeshift<1>
-// CHECK:  %[[VAL_7:.*]] = hlfir.designate %{{.*}}{"i"}   shape %[[VAL_6]] : (!fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
-// CHECK:  %[[VAL_8:.*]] = fir.do_loop %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_4]] {{.*}}
-// CHECK:    %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_1]] : index
-// CHECK:    hlfir.designate %[[VAL_7]] (%[[VAL_11]])  : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
deleted file mode 100644
index 9453a33..0000000
--- a/flang/test/HLFIR/minloc-elemental.fir
+++ /dev/null
@@ -1,397 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i32
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi32>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-
-func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box<!fir.array<?xi16>>
-  hlfir.destroy %7 : !hlfir.expr<1xi16>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL:  func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c1_i16 = arith.constant 1 : i16
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i16 = arith.constant 0 : i16
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i16
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi16>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:    fir.store %c0_i16 to %[[V8]] : !fir.ref<i16>
-// CHECK-NEXT:    fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i16
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i16>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi16>>, !fir.box<!fir.array<?xi16>>
-// CHECK-NEXT:    return
-
-
-func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c1 = arith.constant 1 : index
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %10 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %11 = fir.load %10 : !fir.ref<i32>
-    %12 = arith.cmpi sge, %11, %3 : i32
-    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %13 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
-  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg3: index):
-    %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16
-    %11 = fir.convert %10 : (i16) -> i32
-    hlfir.yield_element %11 : i32
-  }
-  hlfir.assign %9 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %9 : !hlfir.expr<?xi32>
-  hlfir.destroy %7 : !hlfir.expr<1xi16>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL:   func.func @_QPtest_kind2_convert(
-// CHECK-SAME:                                     %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"},
-// CHECK-SAME:                                     %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "val"},
-// CHECK-SAME:                                     %[[VAL_2:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK:           %[[VAL_3:.*]] = arith.constant false
-// CHECK:           %[[VAL_4:.*]] = arith.constant true
-// CHECK:           %[[VAL_5:.*]] = arith.constant 2147483647 : i32
-// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i16
-// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i16
-// CHECK:           %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_10:.*]] = fir.alloca i16
-// CHECK:           %[[VAL_11:.*]] = fir.alloca !fir.array<1xi16>
-// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-// CHECK:           %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK:           fir.store %[[VAL_8]] to %[[VAL_16]] : !fir.ref<i16>
-// CHECK:           fir.store %[[VAL_8]] to %[[VAL_10]] : !fir.ref<i16>
-// CHECK:           %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_12]]#0, %[[VAL_7]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:           %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#1, %[[VAL_9]] : index
-// CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_18]] step %[[VAL_9]] iter_args(%[[VAL_21:.*]] = %[[VAL_5]]) -> (i32) {
-// CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_9]] : index
-// CHECK:             %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:             %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<i32>
-// CHECK:             %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_15]] : i32
-// CHECK:             %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (i32) {
-// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_10]] : !fir.ref<i16>
-// CHECK:               %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<i32>
-// CHECK:               %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_29]], %[[VAL_21]] : i32
-// CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (i16) -> i1
-// CHECK:               %[[VAL_32:.*]] = arith.xori %[[VAL_31]], %[[VAL_4]] : i1
-// CHECK:               %[[VAL_33:.*]] = arith.ori %[[VAL_30]], %[[VAL_32]] : i1
-// CHECK:               %[[VAL_34:.*]] = fir.if %[[VAL_33]] -> (i32) {
-// CHECK:                 fir.store %[[VAL_6]] to %[[VAL_10]] : !fir.ref<i16>
-// CHECK:                 %[[VAL_35:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_22]] : (index) -> i16
-// CHECK:                 fir.store %[[VAL_36]] to %[[VAL_35]] : !fir.ref<i16>
-// CHECK:                 fir.result %[[VAL_29]] : i32
-// CHECK:               } else {
-// CHECK:                 fir.result %[[VAL_21]] : i32
-// CHECK:               }
-// CHECK:               fir.result %[[VAL_34]] : i32
-// CHECK:             } else {
-// CHECK:               fir.result %[[VAL_21]] : i32
-// CHECK:             }
-// CHECK:             fir.result %[[VAL_26]] : i32
-// CHECK:           }
-// CHECK:           %[[VAL_37:.*]] = hlfir.as_expr %[[VAL_11]] move %[[VAL_3]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
-// CHECK:           fir.do_loop %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_9]] step %[[VAL_9]] unordered {
-// CHECK:             %[[VAL_39:.*]] = hlfir.apply %[[VAL_37]], %[[VAL_38]] : (!hlfir.expr<1xi16>, index) -> i16
-// CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i32
-// CHECK:             %[[VAL_41:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_38]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:             hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i32, !fir.ref<i32>
-// CHECK:           }
-// CHECK:           return
-// CHECK:         }
-
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3 = fir.load %2#0 : !fir.ref<f32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-    %9 = fir.load %8 : !fir.ref<f32>
-    %10 = arith.cmpf oge, %9, %3 : f32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant 0x7F800000 : f32
-// CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
-// CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
-// CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
-// CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
-// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
-// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
-// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
-// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:         fir.result %[[V20]] : f32
-// CHECK-NEXT:       } else {
-// CHECK-NEXT:         fir.result %arg4 : f32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V22]] : f32
-// CHECK-NEXT:     } else {
-// CHECK-NEXT:       fir.result %arg4 : f32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     fir.result %[[V18]] : f32
-// CHECK-NEXT:   }
-
-
-func.func @_QPtest_assignshape(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.ref<!fir.array<3xi32>> {fir.bindc_name = "m"}) {
-  %c2 = arith.constant 2 : index
-  %c1 = arith.constant 1 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref<!fir.array<3x3xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x3xf32>>, !fir.ref<!fir.array<3x3xf32>>)
-  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
-  %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
-  %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %5 = fir.load %4#0 : !fir.ref<f32>
-  %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %10 = hlfir.designate %1#0 (%arg3, %arg4)  : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
-    %11 = fir.load %10 : !fir.ref<f32>
-    %12 = arith.cmpf oge, %11, %5 : f32
-    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %13 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<3x3xf32>>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32>
-  %8 = fir.shape %c2 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %3#0 (%c1:%c2:%c1)  shape %8 : (!fir.ref<!fir.array<3xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
-  hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
-  hlfir.destroy %7 : !hlfir.expr<2xi32>
-  hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>>
-  return
-}
-// Not supported as the input is not a box
-// CHECK-LABEL: _QPtest_assignshape
-// CHECK: hlfir.minloc
-
-
-func.func @_QFPtest_character(%arg0: !fir.box<!fir.array<?x!fir.char<1>>> {fir.bindc_name = "b"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "c"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box<!fir.array<?x!fir.char<1>>>, index) -> (!fir.box<!fir.array<?x!fir.char<1>>>, !fir.box<!fir.array<?x!fir.char<1>>>)
-  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"}
-  %3 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
-  %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %8 = fir.load %7#0 : !fir.ref<i32>
-  %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %10 = fir.shape %9#1 : (index) -> !fir.shape<1>
-  %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %16 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %17 = fir.load %16 : !fir.ref<i32>
-    %18 = arith.cmpi eq, %17, %8 : i32
-    %19 = fir.convert %18 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %19 : !fir.logical<4>
-  }
-  %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1>>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
-  hlfir.destroy %12 : !hlfir.expr<1xi32>
-  hlfir.destroy %11 : !hlfir.expr<?x!fir.logical<4>>
-  %13 = hlfir.designate %4#0 (%c1)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-  %14 = fir.load %13 : !fir.ref<i32>
-  hlfir.assign %14 to %6#0 : i32, !fir.ref<i32>
-  %15 = fir.load %6#1 : !fir.ref<i32>
-  return %15 : i32
-}
-// Characters are not supported at the moment
-// CHECK-LABEL: _QFPtest_character
-// CHECK: hlfir.minloc
-
-
-func.func @_QPtest_parts(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "mask"}) -> f32 {
-  %c1 = arith.constant 1 : index
-  %c5 = arith.constant 5 : index
-  %c0 = arith.constant 0 : index
-  %c5_i32 = arith.constant 5 : i32
-  %0:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEmask"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1 = fir.alloca f32 {bindc_name = "test", uniq_name = "_QFtestEtest"}
-  %2:2 = hlfir.declare %1 {uniq_name = "_QFtestEtest"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg2: index):
-    %11 = hlfir.designate %0#0 (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %12 = fir.load %11 : !fir.ref<i32>
-    %13 = arith.cmpi sge, %12, %c5_i32 : i32
-    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %14 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %3#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %3#0 (%c5:%c5:%c1)  shape %8 : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
-  hlfir.assign %7 to %9 : !hlfir.expr<1xi32>, !fir.box<!fir.array<1xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  %10 = fir.load %2#1 : !fir.ref<f32>
-  return %10 : f32
-}
-// Characters are not supported at the moment
-// CHECK-LABEL: _QPtest_parts
-// CHECK: fir.do_loop %{{.*}} = %c0 to %{{.*}} step %c1 iter_args(%{{.*}} = %c2147483647_i32) -> (i32) {
-
diff --git a/flang/test/HLFIR/minval-elemental.fir b/flang/test/HLFIR/minval-elemental.fir
deleted file mode 100644
index 64cd540..0000000
--- a/flang/test/HLFIR/minval-elemental.fir
+++ /dev/null
@@ -1,95 +0,0 @@
-// Test maxval inlining for both elemental and designate
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-// subroutine test(array)
-//   integer :: array(:), x
-//   x = minval(abs(array))
-// end subroutine test
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-  %c31_i32 = arith.constant 31 : i32
-  %c0 = arith.constant 0 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg1: index):
-    %8 = hlfir.designate %1#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.shrsi %9, %c31_i32 : i32
-    %11 = arith.xori %9, %10 : i32
-    %12 = arith.subi %11, %10 : i32
-    hlfir.yield_element %12 : i32
-  }
-  %7 = hlfir.minval %6 {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>) -> i32
-  hlfir.assign %7 to %3#0 : i32, !fir.ref<i32>
-  hlfir.destroy %6 : !hlfir.expr<?xi32>
-  return
-}
-
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c31_i32 = arith.constant 31 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %[[V0:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 dummy_scope %[[V0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[V2]] {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V5:.*]] = fir.do_loop %arg1 = %c1 to %[[V4]]#1 step %c1 iter_args(%arg2 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V7:.*]] = fir.load %[[V6]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V8:.*]] = arith.shrsi %[[V7]], %c31_i32 : i32
-// CHECK-NEXT:      %[[V9:.*]] = arith.xori %[[V7]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V10:.*]] = arith.subi %[[V9]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpi slt, %[[V10]], %arg2 : i32
-// CHECK-NEXT:      %[[V12:.*]] = arith.select %[[V11]], %[[V10]], %arg2 : i32
-// CHECK-NEXT:      fir.result %[[V12]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V5]] to %[[V3]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// subroutine test(array)
-//   real :: array(:), x
-//   x = minval(array(3:6))
-// end subroutine test
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}) {
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  %c6 = arith.constant 6 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %2 = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5 = hlfir.designate %1#0 (%c3:%c6:%c1)  shape %4 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-  %6 = hlfir.minval %5 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<4xf32>>) -> f32
-  hlfir.assign %6 to %3#0 : f32, !fir.ref<f32>
-  return
-}
-
-// CHECK-LABEL: _QPtest_float
-// CHECK:       %cst = arith.constant 0x7F800000 : f32
-// CHECK:       %[[V4:.*]] = fir.shape %c4 : (index) -> !fir.shape<1>
-// CHECK-NEXT:  %[[V5:.*]] = hlfir.designate %{{.*}} (%c3:%c6:%c1)  shape %[[V4]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK-NEXT:  %[[V6:.*]] = fir.do_loop %arg1 = %c1 to %c4 step %c1 iter_args(%arg2 = %cst) -> (f32) {
-// CHECK-NEXT:      %[[V7:.*]] = hlfir.designate %[[V5]] (%arg1)  : (!fir.box<!fir.array<4xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:      %[[V8:.*]] = fir.load %[[V7]] : !fir.ref<f32>
-// CHECK-NEXT:      %[[V9:.*]] = arith.cmpf olt, %[[V8]], %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V10:.*]] = arith.cmpf une, %arg2, %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpf oeq, %[[V8]], %[[V8]] fastmath<contract> : f32
-// CHECK-NEXT:      %[[V12:.*]] = arith.andi %[[V10]], %[[V11]] : i1
-// CHECK-NEXT:      %[[V13:.*]] = arith.ori %[[V9]], %[[V12]] : i1
-// CHECK-NEXT:      %[[V14:.*]] = arith.select %[[V13]], %[[V8]], %arg2 : f32
-// CHECK-NEXT:      fir.result %[[V14]] : f32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V6]] to %3#0 : f32, !fir.ref<f32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir
new file mode 100644
index 0000000..fb0f398
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir
@@ -0,0 +1,123 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+  %0 = hlfir.all %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_7]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_14:.*]] = arith.andi %[[VAL_13]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_14]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_15]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.all %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?x!fir.logical<1>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<1>>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]], %[[VAL_6]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index):
+// CHECK:             %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_13:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:               %[[VAL_14:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_12]], %[[VAL_10]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = arith.andi %[[VAL_15]], %[[VAL_13]] : i1
+// CHECK:               fir.result %[[VAL_16]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1>
+// CHECK:             hlfir.yield_element %[[VAL_17]] : !fir.logical<1>
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?x?x!fir.logical<1>>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+  %0 = hlfir.all %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant true
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_10]], %[[VAL_14]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_7]], %[[VAL_16]] : index
+// CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_15]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_20]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_21]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_22]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.all %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?x!fir.logical<2>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<2>>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant true
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_7]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]], %[[VAL_21]], %[[VAL_23]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_27:.*]] = arith.andi %[[VAL_26]], %[[VAL_14]] : i1
+// CHECK:               fir.result %[[VAL_27]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<2>
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.logical<2>
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?x!fir.logical<2>>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir
new file mode 100644
index 0000000..5bd76f3
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir
@@ -0,0 +1,123 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+  %0 = hlfir.any %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_7]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_14:.*]] = arith.ori %[[VAL_13]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_14]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_15]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.any %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?x!fir.logical<1>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<1>>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]], %[[VAL_6]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index):
+// CHECK:             %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_13:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:               %[[VAL_14:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_12]], %[[VAL_10]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = arith.ori %[[VAL_15]], %[[VAL_13]] : i1
+// CHECK:               fir.result %[[VAL_16]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1>
+// CHECK:             hlfir.yield_element %[[VAL_17]] : !fir.logical<1>
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?x?x!fir.logical<1>>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+  %0 = hlfir.any %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_10]], %[[VAL_14]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_7]], %[[VAL_16]] : index
+// CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_15]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_20]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_21]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_22]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.any %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?x!fir.logical<2>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<2>>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_7]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]], %[[VAL_21]], %[[VAL_23]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_14]] : i1
+// CHECK:               fir.result %[[VAL_27]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<2>
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.logical<2>
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?x!fir.logical<2>>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir
new file mode 100644
index 0000000..44594c6
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir
@@ -0,0 +1,127 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> i32 {
+  %0 = hlfir.count %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_3]]) -> (i32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (i32) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : i32
+// CHECK:               %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_12]] : i32
+// CHECK:               fir.result %[[VAL_16]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : i32
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?xi16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.count %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?xi16>
+  return %0 : !hlfir.expr<?x?xi16>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?xi16> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]], %[[VAL_7]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi16> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (i16) {
+// CHECK:               %[[VAL_15:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_13]], %[[VAL_11]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_14]], %[[VAL_1]] : i16
+// CHECK:               %[[VAL_18:.*]] = arith.select %[[VAL_16]], %[[VAL_17]], %[[VAL_14]] : i16
+// CHECK:               fir.result %[[VAL_18]] : i16
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]] : i16
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?xi16>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32 {
+  %0 = hlfir.count %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]]) -> (i32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (i32) {
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.addi %[[VAL_11]], %[[VAL_15]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.subi %[[VAL_14]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_18:.*]] = arith.addi %[[VAL_8]], %[[VAL_17]] : index
+// CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_16]], %[[VAL_18]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_21]], %[[VAL_22]], %[[VAL_12]] : i32
+// CHECK:               fir.result %[[VAL_23]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : i32
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?xi64> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.count %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?xi64>
+  return %0 : !hlfir.expr<?x?xi64>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?xi64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]] = fir.shape %[[VAL_6]]#1, %[[VAL_8]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_9]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi64> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index, %[[VAL_12:.*]]: index):
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_2]]) -> (i64) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_14]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_12]], %[[VAL_23]] : index
+// CHECK:               %[[VAL_25:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_15]], %[[VAL_1]] : i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_15]] : i64
+// CHECK:               fir.result %[[VAL_29]] : i64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : i64
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?x?xi64>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
new file mode 100644
index 0000000..4e9f5d0
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
@@ -0,0 +1,312 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_1d_total_expr(%input: !hlfir.expr<?xi32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_1d_total_expr(
+// CHECK-SAME:                                  %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xi32>,
+// CHECK-SAME:                                  %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -2147483648 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<1xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i32, i32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i32, i32, i1) {
+// CHECK:               %[[VAL_18:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]] : (!hlfir.expr<?xi32>, index) -> i32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_19]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_12]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               fir.result %[[VAL_22]], %[[VAL_23]], %[[VAL_2]] : i32, i32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i32, i32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_24:.*]]#0, %[[VAL_24]]#1, %[[VAL_24]]#2 : i32, i32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_28:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_10]]#0 to %[[VAL_28]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_29:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi32>>, i1) -> !hlfir.expr<1xi32>
+// CHECK:           return %[[VAL_29]] : !hlfir.expr<1xi32>
+// CHECK:         }
+
+func.func @test_1d_dim_expr(%input: !hlfir.expr<?xf32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+  %dim = arith.constant 1 : i16
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xf32>, i16, !hlfir.expr<?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_1d_dim_expr(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xf32>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i32, f32, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i32, f32, i1) {
+// CHECK:               %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xf32>, index) -> f32
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf ogt, %[[VAL_17]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_20:.*]] = arith.cmpf oeq, %[[VAL_17]], %[[VAL_17]] fastmath<contract> : f32
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_19]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.ori %[[VAL_18]], %[[VAL_21]] : i1
+// CHECK:               %[[VAL_23:.*]] = arith.ori %[[VAL_22]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_10]] : (index) -> i32
+// CHECK:               %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_11]] : i32
+// CHECK:               %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_17]], %[[VAL_12]] : f32
+// CHECK:               fir.result %[[VAL_25]], %[[VAL_26]], %[[VAL_2]] : i32, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_27:.*]]#0, %[[VAL_27]]#1, %[[VAL_27]]#2 : i32, f32, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_9]]#0 : i32
+// CHECK:         }
+
+func.func @test_1d_total_var(%input: !fir.box<!fir.array<?xf32>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  return %0 : !hlfir.expr<1xi16>
+}
+// CHECK-LABEL:   func.func @test_1d_total_var(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]]#1 step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i16, f32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i16, f32, i1) {
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf ogt, %[[VAL_22]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_13]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_25:.*]] = arith.cmpf oeq, %[[VAL_22]], %[[VAL_22]] fastmath<contract> : f32
+// CHECK:               %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_23]], %[[VAL_26]] : i1
+// CHECK:               %[[VAL_28:.*]] = arith.ori %[[VAL_27]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_11]] : (index) -> i16
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_12]] : i16
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_22]], %[[VAL_13]] : f32
+// CHECK:               fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_2]] : i16, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i16, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32:.*]]#0, %[[VAL_32]]#1, %[[VAL_32]]#2 : i16, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_41:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK:           hlfir.assign %[[VAL_10]]#0 to %[[VAL_41]] : i16, !fir.ref<i16>
+// CHECK:           %[[VAL_42:.*]] = hlfir.as_expr %[[VAL_8]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
+// CHECK:           return %[[VAL_42]] : !hlfir.expr<1xi16>
+// CHECK:         }
+
+func.func @test_1d_dim_var(%input: !fir.box<!fir.array<?xf64>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf64>>, i32, !hlfir.expr<?x!fir.logical<4>>) -> i64
+  return %0 : i64
+}
+// CHECK-LABEL:   func.func @test_1d_dim_var(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]]#1 step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i64, f64, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i64, f64, i1) {
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<f64>
+// CHECK:               %[[VAL_22:.*]] = arith.cmpf ogt, %[[VAL_21]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf oeq, %[[VAL_21]], %[[VAL_21]] fastmath<contract> : f64
+// CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+// CHECK:               %[[VAL_26:.*]] = arith.ori %[[VAL_22]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_11]] : i64
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_27]], %[[VAL_21]], %[[VAL_12]] : f64
+// CHECK:               fir.result %[[VAL_29]], %[[VAL_30]], %[[VAL_2]] : i64, f64, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i64, f64, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_31:.*]]#0, %[[VAL_31]]#1, %[[VAL_31]]#2 : i64, f64, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_9]]#0 : i64
+// CHECK:         }
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?x?xf32>, %mask: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>, !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant false
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant true
+// CHECK:           %[[VAL_7:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]]:5 = fir.do_loop %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_5]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_8]], %[[VAL_17:.*]] = %[[VAL_8]], %[[VAL_18:.*]] = %[[VAL_8]], %[[VAL_19:.*]] = %[[VAL_7]], %[[VAL_20:.*]] = %[[VAL_6]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:             %[[VAL_21:.*]]:5 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_5]] to %[[VAL_12]] step %[[VAL_5]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_16]], %[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_19]], %[[VAL_27:.*]] = %[[VAL_20]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:               %[[VAL_28:.*]]:5 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_5]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_24]], %[[VAL_32:.*]] = %[[VAL_25]], %[[VAL_33:.*]] = %[[VAL_26]], %[[VAL_34:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:                 %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?x!fir.logical<4>>, index, index, index) -> !fir.logical<4>
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK:                 %[[VAL_37:.*]]:5 = fir.if %[[VAL_36]] -> (i32, i32, i32, f32, i1) {
+// CHECK:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                   %[[VAL_39:.*]] = arith.cmpf ogt, %[[VAL_38]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_40:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_41:.*]] = arith.cmpf oeq, %[[VAL_38]], %[[VAL_38]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
+// CHECK:                   %[[VAL_43:.*]] = arith.ori %[[VAL_39]], %[[VAL_42]] : i1
+// CHECK:                   %[[VAL_44:.*]] = arith.ori %[[VAL_43]], %[[VAL_34]] : i1
+// CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                   %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_30]] : i32
+// CHECK:                   %[[VAL_47:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                   %[[VAL_48:.*]] = arith.select %[[VAL_44]], %[[VAL_47]], %[[VAL_31]] : i32
+// CHECK:                   %[[VAL_49:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                   %[[VAL_50:.*]] = arith.select %[[VAL_44]], %[[VAL_49]], %[[VAL_32]] : i32
+// CHECK:                   %[[VAL_51:.*]] = arith.select %[[VAL_44]], %[[VAL_38]], %[[VAL_33]] : f32
+// CHECK:                   fir.result %[[VAL_46]], %[[VAL_48]], %[[VAL_50]], %[[VAL_51]], %[[VAL_4]] : i32, i32, i32, f32, i1
+// CHECK:                 } else {
+// CHECK:                   fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_32]], %[[VAL_33]], %[[VAL_34]] : i32, i32, i32, f32, i1
+// CHECK:                 }
+// CHECK:                 fir.result %[[VAL_52:.*]]#0, %[[VAL_52]]#1, %[[VAL_52]]#2, %[[VAL_52]]#3, %[[VAL_52]]#4 : i32, i32, i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3, %[[VAL_53]]#4 : i32, i32, i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3, %[[VAL_54]]#4 : i32, i32, i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_5]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#0 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_61:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#1 to %[[VAL_61]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_64:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#2 to %[[VAL_64]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_65:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_4]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_65]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32, !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_9]]#1, %[[VAL_11]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_13:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> i1
+// CHECK:           %[[VAL_14:.*]] = hlfir.elemental %[[VAL_12]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_15:.*]]: index, %[[VAL_16:.*]]: index):
+// CHECK:             %[[VAL_17:.*]]:3 = fir.do_loop %[[VAL_18:.*]] = %[[VAL_7]] to %[[VAL_10]]#1 step %[[VAL_7]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_5]], %[[VAL_20:.*]] = %[[VAL_4]], %[[VAL_21:.*]] = %[[VAL_3]]) -> (i32, f32, i1) {
+// CHECK:               %[[VAL_22:.*]] = fir.if %[[VAL_13]] -> (!fir.logical<4>) {
+// CHECK:                 %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_26:.*]] = arith.subi %[[VAL_23]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_27:.*]] = arith.addi %[[VAL_15]], %[[VAL_26]] : index
+// CHECK:                 %[[VAL_28:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_28]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_16]], %[[VAL_30]] : index
+// CHECK:                 %[[VAL_32:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_27]], %[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:                 %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<!fir.logical<4>>
+// CHECK:                 fir.result %[[VAL_33]] : !fir.logical<4>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_34:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+// CHECK:                 fir.result %[[VAL_34]] : !fir.logical<4>
+// CHECK:               }
+// CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_36:.*]]:3 = fir.if %[[VAL_35]] -> (i32, f32, i1) {
+// CHECK:                 %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_38:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_15]], %[[VAL_40]] : index
+// CHECK:                 %[[VAL_42:.*]] = arith.subi %[[VAL_38]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_43:.*]] = arith.addi %[[VAL_18]], %[[VAL_42]] : index
+// CHECK:                 %[[VAL_44:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_45:.*]] = arith.addi %[[VAL_16]], %[[VAL_44]] : index
+// CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_41]], %[[VAL_43]], %[[VAL_45]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_48:.*]] = arith.cmpf ogt, %[[VAL_47]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_49:.*]] = arith.cmpf une, %[[VAL_20]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_50:.*]] = arith.cmpf oeq, %[[VAL_47]], %[[VAL_47]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
+// CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
+// CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_18]] : (index) -> i32
+// CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
+// CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_57:.*]]#0, %[[VAL_57]]#1, %[[VAL_57]]#2 : i32, f32, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_17]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Character comparisons are not supported yet.
+func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.maxloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_character(
+// CHECK:           hlfir.maxloc
+
+// BACK is not supported yet.
+func.func @test_back(%input: !hlfir.expr<?xi32>) -> !hlfir.expr<1xi32> {
+  %back = arith.constant true
+  %0 = hlfir.maxloc %input back %back : (!hlfir.expr<?xi32>, i1) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_back(
+// CHECK:           hlfir.maxloc
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
new file mode 100644
index 0000000..8f414e5
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
@@ -0,0 +1,186 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?xf32>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+  %0 = hlfir.maxval %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?x?xf32>, !hlfir.expr<?x?x!fir.logical<4>>) -> f32
+  return %0 : f32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]]:2 = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] iter_args(%[[VAL_9:.*]] = %[[VAL_3]], %[[FIRST1:.*]] = %[[TRUE]]) -> (f32, i1) {
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]], %[[FIRST2:.*]] = %[[FIRST1]]) -> (f32, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (f32, i1) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_21]], %[[FIRST2]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[IS_FIRST]], %[[VAL_16]], %[[VAL_12]] : f32
+// CHECK:                 fir.result %[[VAL_22]], %[[FALSE]] : f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST2]] : f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]]#0, %[[VAL_15]]#1 : f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]]#0, %[[VAL_10]]#1 : f32, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_7]]#0 : f32
+// CHECK:         }
+
+func.func @test_partial_expr(%input: !hlfir.expr<?x?xf64>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32, !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_3]], %[[FIRST:.*]] = %[[TRUE]]) -> (f64, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (f64, i1) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_21]], %[[FIRST]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[IS_FIRST]], %[[VAL_16]], %[[VAL_12]] : f64
+// CHECK:                 fir.result %[[VAL_22]], %[[FALSE]] : f64, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST]] : f64, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]]#0, %[[VAL_15]]#1 : f64, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_10]]#0 : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+  %0 = hlfir.maxval %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                              %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]], %[[FIRST1:.*]] = %[[TRUE]]) -> (f16, i1) {
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]], %[[FIRST2:.*]] = %[[FIRST1]]) -> (f16, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_11]], %[[VAL_8]])  : (!fir.ref<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]]:2 = fir.if %[[VAL_15]] -> (f16, i1) {
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_19:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index
+// CHECK:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_25:.*]] = arith.cmpf ogt, %[[VAL_24]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_26:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_27:.*]] = arith.cmpf oeq, %[[VAL_24]], %[[VAL_24]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_28:.*]] = arith.andi %[[VAL_26]], %[[VAL_27]] : i1
+// CHECK:                 %[[VAL_29:.*]] = arith.ori %[[VAL_25]], %[[VAL_28]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_29]], %[[FIRST2]] : i1
+// CHECK:                 %[[VAL_30:.*]] = arith.select %[[IS_FIRST]], %[[VAL_24]], %[[VAL_12]] : f16
+// CHECK:                 fir.result %[[VAL_30]], %[[FALSE]] : f16, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST2]] : f16, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_16]]#0, %[[VAL_16]]#1 : f16, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]]#0, %[[VAL_10]]#1 : f16, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_7]]#0 : f16
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, i32, !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16>
+  return %0 : !hlfir.expr<?xf16>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>) -> i1
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf16> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]], %[[FIRST:.*]] = %[[TRUE]]) -> (f16, i1) {
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_9]] -> (!fir.logical<1>) {
+// CHECK:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_19:.*]] = arith.addi %[[VAL_11]], %[[VAL_18]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.logical<1>>
+// CHECK:                 fir.result %[[VAL_23]] : !fir.logical<1>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<1>
+// CHECK:                 fir.result %[[VAL_24]] : !fir.logical<1>
+// CHECK:               }
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_26:.*]]:2 = fir.if %[[VAL_25]] -> (f16, i1) {
+// CHECK:                 %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_28:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_29:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_11]], %[[VAL_29]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.subi %[[VAL_28]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_32:.*]] = arith.addi %[[VAL_13]], %[[VAL_31]] : index
+// CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_30]], %[[VAL_32]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_35:.*]] = arith.cmpf ogt, %[[VAL_34]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_36:.*]] = arith.cmpf une, %[[VAL_14]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_37:.*]] = arith.cmpf oeq, %[[VAL_34]], %[[VAL_34]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_38:.*]] = arith.andi %[[VAL_36]], %[[VAL_37]] : i1
+// CHECK:                 %[[VAL_39:.*]] = arith.ori %[[VAL_35]], %[[VAL_38]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_39]], %[[FIRST]] : i1
+// CHECK:                 %[[VAL_40:.*]] = arith.select %[[IS_FIRST]], %[[VAL_34]], %[[VAL_14]] : f16
+// CHECK:                 fir.result %[[VAL_40]], %[[FALSE]] : f16, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_14]], %[[FIRST]] : f16, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_26]]#0, %[[VAL_26]]#1 : f16, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]]#0 : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
new file mode 100644
index 0000000..0c17fd6
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
@@ -0,0 +1,312 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_1d_total_expr(%input: !hlfir.expr<?xi32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_1d_total_expr(
+// CHECK-SAME:                                  %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xi32>,
+// CHECK-SAME:                                  %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 2147483647 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<1xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i32, i32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i32, i32, i1) {
+// CHECK:               %[[VAL_18:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]] : (!hlfir.expr<?xi32>, index) -> i32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_19]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_12]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               fir.result %[[VAL_22]], %[[VAL_23]], %[[VAL_2]] : i32, i32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i32, i32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_24:.*]]#0, %[[VAL_24]]#1, %[[VAL_24]]#2 : i32, i32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_28:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_10]]#0 to %[[VAL_28]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_29:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi32>>, i1) -> !hlfir.expr<1xi32>
+// CHECK:           return %[[VAL_29]] : !hlfir.expr<1xi32>
+// CHECK:         }
+
+func.func @test_1d_dim_expr(%input: !hlfir.expr<?xf32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+  %dim = arith.constant 1 : i16
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xf32>, i16, !hlfir.expr<?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_1d_dim_expr(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xf32>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i32, f32, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i32, f32, i1) {
+// CHECK:               %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xf32>, index) -> f32
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf olt, %[[VAL_17]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_20:.*]] = arith.cmpf oeq, %[[VAL_17]], %[[VAL_17]] fastmath<contract> : f32
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_19]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.ori %[[VAL_18]], %[[VAL_21]] : i1
+// CHECK:               %[[VAL_23:.*]] = arith.ori %[[VAL_22]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_10]] : (index) -> i32
+// CHECK:               %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_11]] : i32
+// CHECK:               %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_17]], %[[VAL_12]] : f32
+// CHECK:               fir.result %[[VAL_25]], %[[VAL_26]], %[[VAL_2]] : i32, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_27:.*]]#0, %[[VAL_27]]#1, %[[VAL_27]]#2 : i32, f32, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_9]]#0 : i32
+// CHECK:         }
+
+func.func @test_1d_total_var(%input: !fir.box<!fir.array<?xf32>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  return %0 : !hlfir.expr<1xi16>
+}
+// CHECK-LABEL:   func.func @test_1d_total_var(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]]#1 step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i16, f32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i16, f32, i1) {
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf olt, %[[VAL_22]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_13]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_25:.*]] = arith.cmpf oeq, %[[VAL_22]], %[[VAL_22]] fastmath<contract> : f32
+// CHECK:               %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_23]], %[[VAL_26]] : i1
+// CHECK:               %[[VAL_28:.*]] = arith.ori %[[VAL_27]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_11]] : (index) -> i16
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_12]] : i16
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_22]], %[[VAL_13]] : f32
+// CHECK:               fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_2]] : i16, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i16, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32:.*]]#0, %[[VAL_32]]#1, %[[VAL_32]]#2 : i16, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_41:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK:           hlfir.assign %[[VAL_10]]#0 to %[[VAL_41]] : i16, !fir.ref<i16>
+// CHECK:           %[[VAL_42:.*]] = hlfir.as_expr %[[VAL_8]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
+// CHECK:           return %[[VAL_42]] : !hlfir.expr<1xi16>
+// CHECK:         }
+
+func.func @test_1d_dim_var(%input: !fir.box<!fir.array<?xf64>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf64>>, i32, !hlfir.expr<?x!fir.logical<4>>) -> i64
+  return %0 : i64
+}
+// CHECK-LABEL:   func.func @test_1d_dim_var(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]]#1 step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i64, f64, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i64, f64, i1) {
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<f64>
+// CHECK:               %[[VAL_22:.*]] = arith.cmpf olt, %[[VAL_21]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf oeq, %[[VAL_21]], %[[VAL_21]] fastmath<contract> : f64
+// CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+// CHECK:               %[[VAL_26:.*]] = arith.ori %[[VAL_22]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_11]] : i64
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_27]], %[[VAL_21]], %[[VAL_12]] : f64
+// CHECK:               fir.result %[[VAL_29]], %[[VAL_30]], %[[VAL_2]] : i64, f64, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i64, f64, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_31:.*]]#0, %[[VAL_31]]#1, %[[VAL_31]]#2 : i64, f64, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_9]]#0 : i64
+// CHECK:         }
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?x?xf32>, %mask: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>, !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant false
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant true
+// CHECK:           %[[VAL_7:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]]:5 = fir.do_loop %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_5]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_8]], %[[VAL_17:.*]] = %[[VAL_8]], %[[VAL_18:.*]] = %[[VAL_8]], %[[VAL_19:.*]] = %[[VAL_7]], %[[VAL_20:.*]] = %[[VAL_6]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:             %[[VAL_21:.*]]:5 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_5]] to %[[VAL_12]] step %[[VAL_5]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_16]], %[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_19]], %[[VAL_27:.*]] = %[[VAL_20]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:               %[[VAL_28:.*]]:5 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_5]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_24]], %[[VAL_32:.*]] = %[[VAL_25]], %[[VAL_33:.*]] = %[[VAL_26]], %[[VAL_34:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:                 %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?x!fir.logical<4>>, index, index, index) -> !fir.logical<4>
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK:                 %[[VAL_37:.*]]:5 = fir.if %[[VAL_36]] -> (i32, i32, i32, f32, i1) {
+// CHECK:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                   %[[VAL_39:.*]] = arith.cmpf olt, %[[VAL_38]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_40:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_41:.*]] = arith.cmpf oeq, %[[VAL_38]], %[[VAL_38]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
+// CHECK:                   %[[VAL_43:.*]] = arith.ori %[[VAL_39]], %[[VAL_42]] : i1
+// CHECK:                   %[[VAL_44:.*]] = arith.ori %[[VAL_43]], %[[VAL_34]] : i1
+// CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                   %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_30]] : i32
+// CHECK:                   %[[VAL_47:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                   %[[VAL_48:.*]] = arith.select %[[VAL_44]], %[[VAL_47]], %[[VAL_31]] : i32
+// CHECK:                   %[[VAL_49:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                   %[[VAL_50:.*]] = arith.select %[[VAL_44]], %[[VAL_49]], %[[VAL_32]] : i32
+// CHECK:                   %[[VAL_51:.*]] = arith.select %[[VAL_44]], %[[VAL_38]], %[[VAL_33]] : f32
+// CHECK:                   fir.result %[[VAL_46]], %[[VAL_48]], %[[VAL_50]], %[[VAL_51]], %[[VAL_4]] : i32, i32, i32, f32, i1
+// CHECK:                 } else {
+// CHECK:                   fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_32]], %[[VAL_33]], %[[VAL_34]] : i32, i32, i32, f32, i1
+// CHECK:                 }
+// CHECK:                 fir.result %[[VAL_52:.*]]#0, %[[VAL_52]]#1, %[[VAL_52]]#2, %[[VAL_52]]#3, %[[VAL_52]]#4 : i32, i32, i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3, %[[VAL_53]]#4 : i32, i32, i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3, %[[VAL_54]]#4 : i32, i32, i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_5]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#0 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_61:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#1 to %[[VAL_61]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_64:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_14]]#2 to %[[VAL_64]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_65:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_4]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_65]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32, !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_9]]#1, %[[VAL_11]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_13:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> i1
+// CHECK:           %[[VAL_14:.*]] = hlfir.elemental %[[VAL_12]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_15:.*]]: index, %[[VAL_16:.*]]: index):
+// CHECK:             %[[VAL_17:.*]]:3 = fir.do_loop %[[VAL_18:.*]] = %[[VAL_7]] to %[[VAL_10]]#1 step %[[VAL_7]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_5]], %[[VAL_20:.*]] = %[[VAL_4]], %[[VAL_21:.*]] = %[[VAL_3]]) -> (i32, f32, i1) {
+// CHECK:               %[[VAL_22:.*]] = fir.if %[[VAL_13]] -> (!fir.logical<4>) {
+// CHECK:                 %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_26:.*]] = arith.subi %[[VAL_23]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_27:.*]] = arith.addi %[[VAL_15]], %[[VAL_26]] : index
+// CHECK:                 %[[VAL_28:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_28]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_16]], %[[VAL_30]] : index
+// CHECK:                 %[[VAL_32:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_27]], %[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:                 %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<!fir.logical<4>>
+// CHECK:                 fir.result %[[VAL_33]] : !fir.logical<4>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_34:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+// CHECK:                 fir.result %[[VAL_34]] : !fir.logical<4>
+// CHECK:               }
+// CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_36:.*]]:3 = fir.if %[[VAL_35]] -> (i32, f32, i1) {
+// CHECK:                 %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_38:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_15]], %[[VAL_40]] : index
+// CHECK:                 %[[VAL_42:.*]] = arith.subi %[[VAL_38]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_43:.*]] = arith.addi %[[VAL_18]], %[[VAL_42]] : index
+// CHECK:                 %[[VAL_44:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_45:.*]] = arith.addi %[[VAL_16]], %[[VAL_44]] : index
+// CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_41]], %[[VAL_43]], %[[VAL_45]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_48:.*]] = arith.cmpf olt, %[[VAL_47]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_49:.*]] = arith.cmpf une, %[[VAL_20]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_50:.*]] = arith.cmpf oeq, %[[VAL_47]], %[[VAL_47]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
+// CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
+// CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_18]] : (index) -> i32
+// CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
+// CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_57:.*]]#0, %[[VAL_57]]#1, %[[VAL_57]]#2 : i32, f32, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_17]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Character comparisons are not supported yet.
+func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.minloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_character(
+// CHECK:           hlfir.minloc
+
+// BACK is not supported yet.
+func.func @test_back(%input: !hlfir.expr<?xi32>) -> !hlfir.expr<1xi32> {
+  %back = arith.constant true
+  %0 = hlfir.minloc %input back %back : (!hlfir.expr<?xi32>, i1) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_back(
+// CHECK:           hlfir.minloc
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
new file mode 100644
index 0000000..98e4c69
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
@@ -0,0 +1,186 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?xf32>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+  %0 = hlfir.minval %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?x?xf32>, !hlfir.expr<?x?x!fir.logical<4>>) -> f32
+  return %0 : f32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]]:2 = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] iter_args(%[[VAL_9:.*]] = %[[VAL_3]], %[[FIRST1:.*]] = %[[TRUE]]) -> (f32, i1) {
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]], %[[FIRST2:.*]] = %[[FIRST1]]) -> (f32, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (f32, i1) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_21]], %[[FIRST2]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[IS_FIRST]], %[[VAL_16]], %[[VAL_12]] : f32
+// CHECK:                 fir.result %[[VAL_22]], %[[FALSE]] : f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST2]] : f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]]#0, %[[VAL_15]]#1 : f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]]#0, %[[VAL_10]]#1 : f32, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_7]]#0 : f32
+// CHECK:         }
+
+func.func @test_partial_expr(%input: !hlfir.expr<?x?xf64>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32, !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_3]], %[[FIRST:.*]] = %[[TRUE]]) -> (f64, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (f64, i1) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_21]], %[[FIRST]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[IS_FIRST]], %[[VAL_16]], %[[VAL_12]] : f64
+// CHECK:                 fir.result %[[VAL_22]], %[[FALSE]] : f64, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST]] : f64, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]]#0, %[[VAL_15]]#1 : f64, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_10]]#0 : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+  %0 = hlfir.minval %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                              %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]], %[[FIRST1:.*]] = %[[TRUE]]) -> (f16, i1) {
+// CHECK:             %[[VAL_10:.*]]:2 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]], %[[FIRST2:.*]] = %[[FIRST1]]) -> (f16, i1) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_11]], %[[VAL_8]])  : (!fir.ref<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]]:2 = fir.if %[[VAL_15]] -> (f16, i1) {
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_19:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index
+// CHECK:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_25:.*]] = arith.cmpf olt, %[[VAL_24]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_26:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_27:.*]] = arith.cmpf oeq, %[[VAL_24]], %[[VAL_24]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_28:.*]] = arith.andi %[[VAL_26]], %[[VAL_27]] : i1
+// CHECK:                 %[[VAL_29:.*]] = arith.ori %[[VAL_25]], %[[VAL_28]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_29]], %[[FIRST2]] : i1
+// CHECK:                 %[[VAL_30:.*]] = arith.select %[[IS_FIRST]], %[[VAL_24]], %[[VAL_12]] : f16
+// CHECK:                 fir.result %[[VAL_30]], %[[FALSE]] : f16, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]], %[[FIRST2]] : f16, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_16]]#0, %[[VAL_16]]#1 : f16, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]]#0, %[[VAL_10]]#1 : f16, i1
+// CHECK:           }
+// CHECK:           return %[[VAL_7]]#0 : f16
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, i32, !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16>
+  return %0 : !hlfir.expr<?xf16>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>) -> i1
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf16> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]], %[[FIRST:.*]] = %[[TRUE]]) -> (f16, i1) {
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_9]] -> (!fir.logical<1>) {
+// CHECK:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_19:.*]] = arith.addi %[[VAL_11]], %[[VAL_18]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.logical<1>>
+// CHECK:                 fir.result %[[VAL_23]] : !fir.logical<1>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<1>
+// CHECK:                 fir.result %[[VAL_24]] : !fir.logical<1>
+// CHECK:               }
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_26:.*]]:2 = fir.if %[[VAL_25]] -> (f16, i1) {
+// CHECK:                 %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_28:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_29:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_11]], %[[VAL_29]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.subi %[[VAL_28]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_32:.*]] = arith.addi %[[VAL_13]], %[[VAL_31]] : index
+// CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_30]], %[[VAL_32]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_35:.*]] = arith.cmpf olt, %[[VAL_34]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_36:.*]] = arith.cmpf une, %[[VAL_14]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_37:.*]] = arith.cmpf oeq, %[[VAL_34]], %[[VAL_34]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_38:.*]] = arith.andi %[[VAL_36]], %[[VAL_37]] : i1
+// CHECK:                 %[[VAL_39:.*]] = arith.ori %[[VAL_35]], %[[VAL_38]] : i1
+// CHECK:                 %[[IS_FIRST:.*]] = arith.ori %[[VAL_39]], %[[FIRST]] : i1
+// CHECK:                 %[[VAL_40:.*]] = arith.select %[[IS_FIRST]], %[[VAL_34]], %[[VAL_14]] : f16
+// CHECK:                 fir.result %[[VAL_40]], %[[FALSE]] : f16, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_14]], %[[FIRST]] : f16, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_26]]#0, %[[VAL_26]]#1 : f16, i1
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]]#0 : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
+// CHECK:         }