[flang] Implement reductions in the runtime Add runtime APIs, implementations, and tests for ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, PRODUCT, and SUM reduction transformantional intrinsic functions for all relevant argument and result types and kinds, both without DIM= arguments (total reductions) and with (partial reductions). Complex-valued reductions have their APIs in C so that C's _Complex types can be used for their results. Some infrastructure work was also necessary or noticed: * Usage of "long double" in the compiler was cleaned up a bit, and host dependences on x86 / MSVC have been isolated in a new Common/long-double header. * Character comparison has been exposed via an extern template so that reductions could use it. * Mappings from Fortran type category/kind to host C++ types and vice versa have been isolated into runtime/cpp-type.h and then used throughout the runtime as appropriate. * The portable 128-bit integer package in Common/uint128.h was generalized to support signed comparisons. * Bugs in descriptor indexing code were fixed. Differential Revision: https://reviews.llvm.org/D99666 GitOrigin-RevId: e372e0f906194cb051ad71305e00f4634860bdf3

commit: 70a94dbd12bada418e323252a041e00ec3c8c0b7 [log] [tgz]
author: peter klausler <pklausler@nvidia.com> Wed Mar 31 09:14:08 2021 -0700
committer: Copybara-Service <copybara-worker@google.com> Mon Apr 12 11:23:34 2021 -0700
tree: 5e6e8da1cbf152a7146bb7b7735aa36cb9b3e3e3
parent: 6cc777ae3d28e3dd5d396365d936a4df46d26542 [diff]
diff --git a/include/flang/Common/long-double.h b/include/flang/Common/long-double.h
new file mode 100644
index 0000000..b1d39a9
--- /dev/null
+++ b/include/flang/Common/long-double.h

@@ -0,0 +1,23 @@
+/*===-- include/flang/Common/config.h -------------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+/* This header can be used by both C and C++. */
+
+#ifndef FORTRAN_COMMON_LONG_DOUBLE_H
+#define FORTRAN_COMMON_LONG_DOUBLE_H
+
+#ifdef _MSC_VER /* no long double */
+#undef LONG_DOUBLE
+#elif __x86_64__ /* x87 extended precision */
+#define LONG_DOUBLE 80
+#else
+#define LONG_DOUBLE 128
+#endif
+
+#endif /* FORTRAN_COMMON_LONG_DOUBLE_H */

diff --git a/include/flang/Common/uint128.h b/include/flang/Common/uint128.h
index 0ed3cf1..f5ac48d 100644
--- a/include/flang/Common/uint128.h
+++ b/include/flang/Common/uint128.h

@@ -25,31 +25,31 @@
 
 namespace Fortran::common {
 
-class UnsignedInt128 {
+template <bool IS_SIGNED = false> class Int128 {
 public:
-  constexpr UnsignedInt128() {}
+  constexpr Int128() {}
   // This means of definition provides some portability for
   // "size_t" operands.
-  constexpr UnsignedInt128(unsigned n) : low_{n} {}
-  constexpr UnsignedInt128(unsigned long n) : low_{n} {}
-  constexpr UnsignedInt128(unsigned long long n) : low_{n} {}
-  constexpr UnsignedInt128(int n)
+  constexpr Int128(unsigned n) : low_{n} {}
+  constexpr Int128(unsigned long n) : low_{n} {}
+  constexpr Int128(unsigned long long n) : low_{n} {}
+  constexpr Int128(int n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(long n)
+  constexpr Int128(long n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(long long n)
+  constexpr Int128(long long n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(const UnsignedInt128 &) = default;
-  constexpr UnsignedInt128(UnsignedInt128 &&) = default;
-  constexpr UnsignedInt128 &operator=(const UnsignedInt128 &) = default;
-  constexpr UnsignedInt128 &operator=(UnsignedInt128 &&) = default;
+  constexpr Int128(const Int128 &) = default;
+  constexpr Int128(Int128 &&) = default;
+  constexpr Int128 &operator=(const Int128 &) = default;
+  constexpr Int128 &operator=(Int128 &&) = default;
 
-  constexpr UnsignedInt128 operator+() const { return *this; }
-  constexpr UnsignedInt128 operator~() const { return {~high_, ~low_}; }
-  constexpr UnsignedInt128 operator-() const { return ~*this + 1; }
+  constexpr Int128 operator+() const { return *this; }
+  constexpr Int128 operator~() const { return {~high_, ~low_}; }
+  constexpr Int128 operator-() const { return ~*this + 1; }
   constexpr bool operator!() const { return !low_ && !high_; }
   constexpr explicit operator bool() const { return low_ || high_; }
   constexpr explicit operator std::uint64_t() const { return low_; }
@@ -59,36 +59,36 @@
   constexpr std::uint64_t high() const { return high_; }
   constexpr std::uint64_t low() const { return low_; }
 
-  constexpr UnsignedInt128 operator++(/*prefix*/) {
+  constexpr Int128 operator++(/*prefix*/) {
     *this += 1;
     return *this;
   }
-  constexpr UnsignedInt128 operator++(int /*postfix*/) {
-    UnsignedInt128 result{*this};
+  constexpr Int128 operator++(int /*postfix*/) {
+    Int128 result{*this};
     *this += 1;
     return result;
   }
-  constexpr UnsignedInt128 operator--(/*prefix*/) {
+  constexpr Int128 operator--(/*prefix*/) {
     *this -= 1;
     return *this;
   }
-  constexpr UnsignedInt128 operator--(int /*postfix*/) {
-    UnsignedInt128 result{*this};
+  constexpr Int128 operator--(int /*postfix*/) {
+    Int128 result{*this};
     *this -= 1;
     return result;
   }
 
-  constexpr UnsignedInt128 operator&(UnsignedInt128 that) const {
+  constexpr Int128 operator&(Int128 that) const {
     return {high_ & that.high_, low_ & that.low_};
   }
-  constexpr UnsignedInt128 operator|(UnsignedInt128 that) const {
+  constexpr Int128 operator|(Int128 that) const {
     return {high_ | that.high_, low_ | that.low_};
   }
-  constexpr UnsignedInt128 operator^(UnsignedInt128 that) const {
+  constexpr Int128 operator^(Int128 that) const {
     return {high_ ^ that.high_, low_ ^ that.low_};
   }
 
-  constexpr UnsignedInt128 operator<<(UnsignedInt128 that) const {
+  constexpr Int128 operator<<(Int128 that) const {
     if (that >= 128) {
       return {};
     } else if (that == 0) {
@@ -102,7 +102,7 @@
       }
     }
   }
-  constexpr UnsignedInt128 operator>>(UnsignedInt128 that) const {
+  constexpr Int128 operator>>(Int128 that) const {
     if (that >= 128) {
       return {};
     } else if (that == 0) {
@@ -117,43 +117,41 @@
     }
   }
 
-  constexpr UnsignedInt128 operator+(UnsignedInt128 that) const {
+  constexpr Int128 operator+(Int128 that) const {
     std::uint64_t lower{(low_ & ~topBit) + (that.low_ & ~topBit)};
     bool carry{((lower >> 63) + (low_ >> 63) + (that.low_ >> 63)) > 1};
     return {high_ + that.high_ + carry, low_ + that.low_};
   }
-  constexpr UnsignedInt128 operator-(UnsignedInt128 that) const {
-    return *this + -that;
-  }
+  constexpr Int128 operator-(Int128 that) const { return *this + -that; }
 
-  constexpr UnsignedInt128 operator*(UnsignedInt128 that) const {
+  constexpr Int128 operator*(Int128 that) const {
     std::uint64_t mask32{0xffffffff};
     if (high_ == 0 && that.high_ == 0) {
       std::uint64_t x0{low_ & mask32}, x1{low_ >> 32};
       std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32};
-      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1};
-      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1};
+      Int128 x0y0{x0 * y0}, x0y1{x0 * y1};
+      Int128 x1y0{x1 * y0}, x1y1{x1 * y1};
       return x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64);
     } else {
       std::uint64_t x0{low_ & mask32}, x1{low_ >> 32}, x2{high_ & mask32},
           x3{high_ >> 32};
       std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32},
           y2{that.high_ & mask32}, y3{that.high_ >> 32};
-      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1}, x0y2{x0 * y2}, x0y3{x0 * y3};
-      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1}, x1y2{x1 * y2};
-      UnsignedInt128 x2y0{x2 * y0}, x2y1{x2 * y1};
-      UnsignedInt128 x3y0{x3 * y0};
+      Int128 x0y0{x0 * y0}, x0y1{x0 * y1}, x0y2{x0 * y2}, x0y3{x0 * y3};
+      Int128 x1y0{x1 * y0}, x1y1{x1 * y1}, x1y2{x1 * y2};
+      Int128 x2y0{x2 * y0}, x2y1{x2 * y1};
+      Int128 x3y0{x3 * y0};
       return x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
           ((x0y3 + x1y2 + x2y1 + x3y0) << 96);
     }
   }
 
-  constexpr UnsignedInt128 operator/(UnsignedInt128 that) const {
+  constexpr Int128 operator/(Int128 that) const {
     int j{LeadingZeroes()};
-    UnsignedInt128 bits{*this};
+    Int128 bits{*this};
     bits <<= j;
-    UnsignedInt128 numerator{};
-    UnsignedInt128 quotient{};
+    Int128 numerator{};
+    Int128 quotient{};
     for (; j < 128; ++j) {
       numerator <<= 1;
       if (bits.high_ & topBit) {
@@ -169,11 +167,11 @@
     return quotient;
   }
 
-  constexpr UnsignedInt128 operator%(UnsignedInt128 that) const {
+  constexpr Int128 operator%(Int128 that) const {
     int j{LeadingZeroes()};
-    UnsignedInt128 bits{*this};
+    Int128 bits{*this};
     bits <<= j;
-    UnsignedInt128 remainder{};
+    Int128 remainder{};
     for (; j < 128; ++j) {
       remainder <<= 1;
       if (bits.high_ & topBit) {
@@ -187,65 +185,63 @@
     return remainder;
   }
 
-  constexpr bool operator<(UnsignedInt128 that) const {
+  constexpr bool operator<(Int128 that) const {
+    if (IS_SIGNED && (high_ ^ that.high_) & topBit) {
+      return (high_ & topBit) != 0;
+    }
     return high_ < that.high_ || (high_ == that.high_ && low_ < that.low_);
   }
-  constexpr bool operator<=(UnsignedInt128 that) const {
-    return !(*this > that);
-  }
-  constexpr bool operator==(UnsignedInt128 that) const {
+  constexpr bool operator<=(Int128 that) const { return !(*this > that); }
+  constexpr bool operator==(Int128 that) const {
     return low_ == that.low_ && high_ == that.high_;
   }
-  constexpr bool operator!=(UnsignedInt128 that) const {
-    return !(*this == that);
-  }
-  constexpr bool operator>=(UnsignedInt128 that) const { return that <= *this; }
-  constexpr bool operator>(UnsignedInt128 that) const { return that < *this; }
+  constexpr bool operator!=(Int128 that) const { return !(*this == that); }
+  constexpr bool operator>=(Int128 that) const { return that <= *this; }
+  constexpr bool operator>(Int128 that) const { return that < *this; }
 
-  constexpr UnsignedInt128 &operator&=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator&=(const Int128 &that) {
     *this = *this & that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator|=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator|=(const Int128 &that) {
     *this = *this | that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator^=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator^=(const Int128 &that) {
     *this = *this ^ that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator<<=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator<<=(const Int128 &that) {
     *this = *this << that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator>>=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator>>=(const Int128 &that) {
     *this = *this >> that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator+=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator+=(const Int128 &that) {
     *this = *this + that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator-=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator-=(const Int128 &that) {
     *this = *this - that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator*=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator*=(const Int128 &that) {
     *this = *this * that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator/=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator/=(const Int128 &that) {
     *this = *this / that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator%=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator%=(const Int128 &that) {
     *this = *this % that;
     return *this;
   }
 
 private:
-  constexpr UnsignedInt128(std::uint64_t hi, std::uint64_t lo)
-      : low_{lo}, high_{hi} {}
+  constexpr Int128(std::uint64_t hi, std::uint64_t lo) : low_{lo}, high_{hi} {}
   constexpr int LeadingZeroes() const {
     if (high_ == 0) {
       return 64 + LeadingZeroBitCount(low_);
@@ -257,12 +253,16 @@
   std::uint64_t low_{0}, high_{0};
 };
 
-#if AVOID_NATIVE_UINT128_T
-using uint128_t = UnsignedInt128;
-#elif (defined __GNUC__ || defined __clang__) && defined __SIZEOF_INT128__
+using UnsignedInt128 = Int128<false>;
+using SignedInt128 = Int128<true>;
+
+#if !AVOID_NATIVE_UINT128_t && (defined __GNUC__ || defined __clang__) && \
+    defined __SIZEOF_INT128__
 using uint128_t = __uint128_t;
+using int128_t = __int128_t;
 #else
 using uint128_t = UnsignedInt128;
+using int128_t = SignedInt128;
 #endif
 
 template <int BITS> struct HostUnsignedIntTypeHelper {
@@ -271,8 +271,16 @@
           std::conditional_t<(BITS <= 32), std::uint32_t,
               std::conditional_t<(BITS <= 64), std::uint64_t, uint128_t>>>>;
 };
+template <int BITS> struct HostSignedIntTypeHelper {
+  using type = std::conditional_t<(BITS <= 8), std::int8_t,
+      std::conditional_t<(BITS <= 16), std::int16_t,
+          std::conditional_t<(BITS <= 32), std::int32_t,
+              std::conditional_t<(BITS <= 64), std::int64_t, int128_t>>>>;
+};
 template <int BITS>
 using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
+template <int BITS>
+using HostSignedIntType = typename HostSignedIntTypeHelper<BITS>::type;
 
 } // namespace Fortran::common
 #endif

diff --git a/include/flang/Decimal/decimal.h b/include/flang/Decimal/decimal.h
index f9808dc..6891d39 100644
--- a/include/flang/Decimal/decimal.h
+++ b/include/flang/Decimal/decimal.h

@@ -129,20 +129,16 @@
 struct NS(ConversionToDecimalResult)
     ConvertDoubleToDecimal(char *, size_t, enum NS(DecimalConversionFlags),
         int digits, enum NS(FortranRounding), double);
-#if __x86_64__ && !defined(_MSC_VER)
 struct NS(ConversionToDecimalResult)
     ConvertLongDoubleToDecimal(char *, size_t, enum NS(DecimalConversionFlags),
         int digits, enum NS(FortranRounding), long double);
-#endif
 
 enum NS(ConversionResultFlags)
     ConvertDecimalToFloat(const char **, float *, enum NS(FortranRounding));
 enum NS(ConversionResultFlags)
     ConvertDecimalToDouble(const char **, double *, enum NS(FortranRounding));
-#if __x86_64__ && !defined(_MSC_VER)
 enum NS(ConversionResultFlags) ConvertDecimalToLongDouble(
     const char **, long double *, enum NS(FortranRounding));
-#endif
 #undef NS
 #ifdef __cplusplus
 } // extern "C"

diff --git a/lib/Decimal/binary-to-decimal.cpp b/lib/Decimal/binary-to-decimal.cpp
index af233d5..c6e5ee8 100644
--- a/lib/Decimal/binary-to-decimal.cpp
+++ b/lib/Decimal/binary-to-decimal.cpp

@@ -350,14 +350,12 @@
       rounding, Fortran::decimal::BinaryFloatingPointNumber<53>(x));
 }
 
-#if __x86_64__ && !defined(_MSC_VER)
 ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer,
     std::size_t size, enum DecimalConversionFlags flags, int digits,
     enum FortranRounding rounding, long double x) {
   return Fortran::decimal::ConvertToDecimal(buffer, size, flags, digits,
       rounding, Fortran::decimal::BinaryFloatingPointNumber<64>(x));
 }
-#endif
 }
 
 template <int PREC, int LOG10RADIX>

diff --git a/lib/Decimal/decimal-to-binary.cpp b/lib/Decimal/decimal-to-binary.cpp
index 5e927e9..d6e30be 100644
--- a/lib/Decimal/decimal-to-binary.cpp
+++ b/lib/Decimal/decimal-to-binary.cpp

@@ -454,7 +454,6 @@
       reinterpret_cast<const void *>(&result.binary), sizeof *d);
   return result.flags;
 }
-#if __x86_64__ && !defined(_MSC_VER)
 enum ConversionResultFlags ConvertDecimalToLongDouble(
     const char **p, long double *ld, enum FortranRounding rounding) {
   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
@@ -462,6 +461,5 @@
       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
   return result.flags;
 }
-#endif
 }
 } // namespace Fortran::decimal

diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 005df3b..fc90432 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt

@@ -34,6 +34,7 @@
   ISO_Fortran_binding.cpp
   allocatable.cpp
   buffer.cpp
+  complex-reduction.c
   character.cpp
   connection.cpp
   derived.cpp
@@ -50,6 +51,7 @@
   io-stmt.cpp
   main.cpp
   memory.cpp
+  reduction.cpp
   stat.cpp
   stop.cpp
   terminator.cpp

diff --git a/runtime/character.cpp b/runtime/character.cpp
index 8c9dfec..a6757b0 100644
--- a/runtime/character.cpp
+++ b/runtime/character.cpp

@@ -7,8 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "character.h"
+#include "cpp-type.h"
 #include "descriptor.h"
 #include "terminator.h"
+#include "tools.h"
 #include "flang/Common/bit-population-count.h"
 #include "flang/Common/uint128.h"
 #include <algorithm>
@@ -30,7 +32,7 @@
 }
 
 template <typename CHAR>
-static int Compare(
+int CharacterScalarCompare(
     const CHAR *x, const CHAR *y, std::size_t xChars, std::size_t yChars) {
   auto minChars{std::min(xChars, yChars)};
   if constexpr (sizeof(CHAR) == 1) {
@@ -63,6 +65,13 @@
   return -CompareToBlankPadding(y, yChars - minChars);
 }
 
+template int CharacterScalarCompare<char>(
+    const char *x, const char *y, std::size_t xChars, std::size_t yChars);
+template int CharacterScalarCompare<char16_t>(const char16_t *x,
+    const char16_t *y, std::size_t xChars, std::size_t yChars);
+template int CharacterScalarCompare<char32_t>(const char32_t *x,
+    const char32_t *y, std::size_t xChars, std::size_t yChars);
+
 // Shift count to use when converting between character lengths
 // and byte counts.
 template <typename CHAR>
@@ -103,8 +112,8 @@
   std::size_t yChars{y.ElementBytes() >> shift<char>};
   for (SubscriptValue resultAt{0}; elements-- > 0;
        ++resultAt, x.IncrementSubscripts(xAt), y.IncrementSubscripts(yAt)) {
-    *result.OffsetElement<char>(resultAt) =
-        Compare(x.Element<CHAR>(xAt), y.Element<CHAR>(yAt), xChars, yChars);
+    *result.OffsetElement<char>(resultAt) = CharacterScalarCompare<CHAR>(
+        x.Element<CHAR>(xAt), y.Element<CHAR>(yAt), xChars, yChars);
   }
 }
 
@@ -216,38 +225,30 @@
     const Terminator &terminator) {
   switch (kind) {
   case 1:
-    LenTrim<std::int8_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 1>, CHAR>(
+        result, string, terminator);
     break;
   case 2:
-    LenTrim<std::int16_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 2>, CHAR>(
+        result, string, terminator);
     break;
   case 4:
-    LenTrim<std::int32_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 4>, CHAR>(
+        result, string, terminator);
     break;
   case 8:
-    LenTrim<std::int64_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 8>, CHAR>(
+        result, string, terminator);
     break;
   case 16:
-    LenTrim<common::uint128_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 16>, CHAR>(
+        result, string, terminator);
     break;
   default:
     terminator.Crash("LEN_TRIM: bad KIND=%d", kind);
   }
 }
 
-// Utility for dealing with elemental LOGICAL arguments
-static bool IsLogicalElementTrue(
-    const Descriptor &logical, const SubscriptValue at[]) {
-  // A LOGICAL value is false if and only if all of its bytes are zero.
-  const char *p{logical.Element<char>(at)};
-  for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) {
-    if (*p) {
-      return true;
-    }
-  }
-  return false;
-}
-
 // INDEX implementation
 template <typename CHAR>
 inline std::size_t Index(const CHAR *x, std::size_t xLen, const CHAR *want,
@@ -419,23 +420,23 @@
     const Terminator &terminator) {
   switch (kind) {
   case 1:
-    GeneralCharFunc<std::int8_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 1>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 2:
-    GeneralCharFunc<std::int16_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 2>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 4:
-    GeneralCharFunc<std::int32_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 4>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 8:
-    GeneralCharFunc<std::int64_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 8>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 16:
-    GeneralCharFunc<common::uint128_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 16>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   default:
@@ -509,7 +510,7 @@
   for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
        accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
     const CHAR *xData{x.Element<CHAR>(xAt)};
-    int cmp{Compare(accumData, xData, accumChars, xChars)};
+    int cmp{CharacterScalarCompare(accumData, xData, accumChars, xChars)};
     if constexpr (ISMIN) {
       cmp = -cmp;
     }
@@ -754,14 +755,16 @@
   RUNTIME_CHECK(terminator, x.raw().type == y.raw().type);
   switch (x.raw().type) {
   case CFI_type_char:
-    return Compare(x.OffsetElement<char>(), y.OffsetElement<char>(),
-        x.ElementBytes(), y.ElementBytes());
+    return CharacterScalarCompare<char>(x.OffsetElement<char>(),
+        y.OffsetElement<char>(), x.ElementBytes(), y.ElementBytes());
   case CFI_type_char16_t:
-    return Compare(x.OffsetElement<char16_t>(), y.OffsetElement<char16_t>(),
-        x.ElementBytes() >> 1, y.ElementBytes() >> 1);
+    return CharacterScalarCompare<char16_t>(x.OffsetElement<char16_t>(),
+        y.OffsetElement<char16_t>(), x.ElementBytes() >> 1,
+        y.ElementBytes() >> 1);
   case CFI_type_char32_t:
-    return Compare(x.OffsetElement<char32_t>(), y.OffsetElement<char32_t>(),
-        x.ElementBytes() >> 2, y.ElementBytes() >> 2);
+    return CharacterScalarCompare<char32_t>(x.OffsetElement<char32_t>(),
+        y.OffsetElement<char32_t>(), x.ElementBytes() >> 2,
+        y.ElementBytes() >> 2);
   default:
     terminator.Crash("CharacterCompareScalar: bad string type code %d",
         static_cast<int>(x.raw().type));
@@ -771,17 +774,17 @@
 
 int RTNAME(CharacterCompareScalar1)(
     const char *x, const char *y, std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 int RTNAME(CharacterCompareScalar2)(const char16_t *x, const char16_t *y,
     std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 int RTNAME(CharacterCompareScalar4)(const char32_t *x, const char32_t *y,
     std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 void RTNAME(CharacterCompare)(

diff --git a/runtime/character.h b/runtime/character.h
index 6b813bf..8e5475c 100644
--- a/runtime/character.h
+++ b/runtime/character.h

@@ -18,6 +18,16 @@
 
 class Descriptor;
 
+template <typename CHAR>
+int CharacterScalarCompare(
+    const CHAR *x, const CHAR *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char>(
+    const char *x, const char *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char16_t>(const char16_t *x,
+    const char16_t *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char32_t>(const char32_t *x,
+    const char32_t *y, std::size_t xChars, std::size_t yChars);
+
 extern "C" {
 
 // Appends the corresponding (or expanded) characters of 'operand'

diff --git a/runtime/complex-reduction.c b/runtime/complex-reduction.c
new file mode 100644
index 0000000..443110b
--- /dev/null
+++ b/runtime/complex-reduction.c

@@ -0,0 +1,68 @@
+/*===-- flang/runtime/complex-reduction.c ---------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#include "complex-reduction.h"
+#include "flang/Common/long-double.h"
+#include <complex.h>
+
+/* These are the C standard's names for _Complex constructors; not all C
+ * compilers support them.
+ */
+#if !defined(CMPLXF) && defined(__clang__)
+#define CMPLXF __builtin_complex
+#define CMPLX __builtin_complex
+#define CMPLXL __builtin_complex
+#endif
+
+struct CppComplexFloat {
+  float r, i;
+};
+struct CppComplexDouble {
+  double r, i;
+};
+struct CppComplexLongDouble {
+  long double r, i;
+};
+
+/* RTNAME(SumComplex4) calls RTNAME(CppSumComplex4) with the same arguments
+ * and converts the members of its C++ complex result to C _Complex.
+ */
+
+#define CPP_NAME(name) Cpp##name
+#define ADAPT_REDUCTION(name, cComplex, cpptype, cmplxMacro) \
+  struct cpptype RTNAME(CPP_NAME(name))(struct cpptype *, REDUCTION_ARGS); \
+  cComplex RTNAME(name)(REDUCTION_ARGS) { \
+    struct cpptype result; \
+    RTNAME(CPP_NAME(name))(&result, REDUCTION_ARG_NAMES); \
+    return cmplxMacro(result.r, result.i); \
+  }
+
+/* TODO: COMPLEX(2 & 3) */
+
+/* SUM() */
+ADAPT_REDUCTION(SumComplex4, float_Complex_t, CppComplexFloat, CMPLXF)
+ADAPT_REDUCTION(SumComplex8, double_Complex_t, CppComplexDouble, CMPLX)
+#if LONG_DOUBLE == 80
+ADAPT_REDUCTION(
+    SumComplex10, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#elif LONG_DOUBLE == 128
+ADAPT_REDUCTION(
+    SumComplex16, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#endif
+
+/* PRODUCT() */
+ADAPT_REDUCTION(ProductComplex4, float_Complex_t, CppComplexFloat, CMPLXF)
+ADAPT_REDUCTION(ProductComplex8, double_Complex_t, CppComplexDouble, CMPLX)
+#if LONG_DOUBLE == 80
+ADAPT_REDUCTION(
+    ProductComplex10, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#elif LONG_DOUBLE == 128
+ADAPT_REDUCTION(
+    ProductComplex16, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#endif

diff --git a/runtime/complex-reduction.h b/runtime/complex-reduction.h
new file mode 100644
index 0000000..3d2dc99
--- /dev/null
+++ b/runtime/complex-reduction.h

@@ -0,0 +1,51 @@
+/*===-- flang/runtime/complex-reduction.h ---------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+/* Wraps the C++-coded complex-valued SUM and PRODUCT reductions with
+ * C-coded wrapper functions returning _Complex values, to avoid problems
+ * with C++ build compilers that don't support C's _Complex.
+ */
+
+#ifndef FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
+#define FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
+
+#include "entry-names.h"
+
+struct CppDescriptor; /* dummy type name for Fortran::runtime::Descriptor */
+
+#ifdef _MSC_VER
+typedef _Fcomplex float_Complex_t;
+typedef _Dcomplex double_Complex_t;
+typedef _Lcomplex long_double_Complex_t;
+#else
+typedef float _Complex float_Complex_t;
+typedef double _Complex double_Complex_t;
+typedef long double long_double_Complex_t;
+#endif
+
+#define REDUCTION_ARGS \
+  const struct CppDescriptor *x, const char *source, int line, int dim /*=0*/, \
+      const struct CppDescriptor *mask /*=NULL*/
+#define REDUCTION_ARG_NAMES x, source, line, dim, mask
+
+float_Complex_t RTNAME(SumComplex2)(REDUCTION_ARGS);
+float_Complex_t RTNAME(SumComplex3)(REDUCTION_ARGS);
+float_Complex_t RTNAME(SumComplex4)(REDUCTION_ARGS);
+double_Complex_t RTNAME(SumComplex8)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(SumComplex10)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(SumComplex16)(REDUCTION_ARGS);
+
+float_Complex_t RTNAME(ProductComplex2)(REDUCTION_ARGS);
+float_Complex_t RTNAME(ProductComplex3)(REDUCTION_ARGS);
+float_Complex_t RTNAME(ProductComplex4)(REDUCTION_ARGS);
+double_Complex_t RTNAME(ProductComplex8)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(ProductComplex10)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(ProductComplex16)(REDUCTION_ARGS);
+
+#endif // FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_

diff --git a/runtime/cpp-type.h b/runtime/cpp-type.h
new file mode 100644
index 0000000..613df97
--- /dev/null
+++ b/runtime/cpp-type.h

@@ -0,0 +1,67 @@
+//===-- runtime/cpp-type.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Maps Fortran intrinsic types to C++ types used in the runtime.
+
+#ifndef FORTRAN_RUNTIME_CPP_TYPE_H_
+#define FORTRAN_RUNTIME_CPP_TYPE_H_
+
+#include "flang/Common/Fortran.h"
+#include "flang/Common/uint128.h"
+#include <complex>
+#include <cstdint>
+
+namespace Fortran::runtime {
+
+using common::TypeCategory;
+
+template <TypeCategory CAT, int KIND> struct CppTypeForHelper {};
+template <TypeCategory CAT, int KIND>
+using CppTypeFor = typename CppTypeForHelper<CAT, KIND>::type;
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Integer, KIND> {
+  using type = common::HostSignedIntType<8 * KIND>;
+};
+
+// TODO: REAL/COMPLEX(2 & 3)
+template <> struct CppTypeForHelper<TypeCategory::Real, 4> {
+  using type = float;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 8> {
+  using type = double;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 10> {
+  using type = long double;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 16> {
+  using type = long double;
+};
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Complex, KIND> {
+  using type = std::complex<CppTypeFor<TypeCategory::Real, KIND>>;
+};
+
+template <> struct CppTypeForHelper<TypeCategory::Character, 1> {
+  using type = char;
+};
+template <> struct CppTypeForHelper<TypeCategory::Character, 2> {
+  using type = char16_t;
+};
+template <> struct CppTypeForHelper<TypeCategory::Character, 4> {
+  using type = char32_t;
+};
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Logical, KIND> {
+  using type = common::HostSignedIntType<8 * KIND>;
+};
+template <> struct CppTypeForHelper<TypeCategory::Logical, 1> {
+  using type = bool;
+};
+
+} // namespace Fortran::runtime
+#endif // FORTRAN_RUNTIME_CPP_TYPE_H_

diff --git a/runtime/descriptor-io.h b/runtime/descriptor-io.h
index f98797d..514bcff 100644
--- a/runtime/descriptor-io.h
+++ b/runtime/descriptor-io.h

@@ -11,6 +11,7 @@
 
 // Implementation of I/O data list item transfers based on descriptors.
 
+#include "cpp-type.h"
 #include "descriptor.h"
 #include "edit-input.h"
 #include "edit-output.h"
@@ -260,15 +261,20 @@
     case TypeCategory::Integer:
       switch (kind) {
       case 1:
-        return FormattedIntegerIO<std::int8_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 1>, DIR>(
+            io, descriptor);
       case 2:
-        return FormattedIntegerIO<std::int16_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 2>, DIR>(
+            io, descriptor);
       case 4:
-        return FormattedIntegerIO<std::int32_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 4>, DIR>(
+            io, descriptor);
       case 8:
-        return FormattedIntegerIO<std::int64_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 8>, DIR>(
+            io, descriptor);
       case 16:
-        return FormattedIntegerIO<common::uint128_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 16>, DIR>(
+            io, descriptor);
       default:
         io.GetIoErrorHandler().Crash(
             "DescriptorIO: Unimplemented INTEGER kind (%d) in descriptor",
@@ -330,13 +336,17 @@
     case TypeCategory::Logical:
       switch (kind) {
       case 1:
-        return FormattedLogicalIO<std::int8_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 1>, DIR>(
+            io, descriptor);
       case 2:
-        return FormattedLogicalIO<std::int16_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 2>, DIR>(
+            io, descriptor);
       case 4:
-        return FormattedLogicalIO<std::int32_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 4>, DIR>(
+            io, descriptor);
       case 8:
-        return FormattedLogicalIO<std::int64_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 8>, DIR>(
+            io, descriptor);
       default:
         io.GetIoErrorHandler().Crash(
             "DescriptorIO: Unimplemented LOGICAL kind (%d) in descriptor",

diff --git a/runtime/descriptor.cpp b/runtime/descriptor.cpp
index b66874b..54069fe 100644
--- a/runtime/descriptor.cpp
+++ b/runtime/descriptor.cpp

@@ -17,9 +17,7 @@
 
 namespace Fortran::runtime {
 
-Descriptor::Descriptor(const Descriptor &that) {
-  std::memcpy(this, &that, that.SizeInBytes());
-}
+Descriptor::Descriptor(const Descriptor &that) { *this = that; }
 
 Descriptor::~Descriptor() {
   if (raw_.attribute != CFI_attribute_pointer) {
@@ -27,6 +25,11 @@
   }
 }
 
+Descriptor &Descriptor::operator=(const Descriptor &that) {
+  std::memcpy(this, &that, that.SizeInBytes());
+  return *this;
+}
+
 void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
     int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
     bool addendum) {
@@ -224,10 +227,9 @@
   for (int j{raw_.rank - 1}; j >= 0; --j) {
     int k{permutation ? permutation[j] : j};
     const Dimension &dim{GetDimension(k)};
-    std::size_t quotient{j ? elementNumber / dimCoefficient[j] : 0};
-    subscript[k] =
-        dim.LowerBound() + elementNumber - dimCoefficient[j] * quotient;
-    elementNumber = quotient;
+    std::size_t quotient{elementNumber / dimCoefficient[j]};
+    subscript[k] = quotient + dim.LowerBound();
+    elementNumber -= quotient * dimCoefficient[j];
   }
   return true;
 }

diff --git a/runtime/descriptor.h b/runtime/descriptor.h
index c839330..d909822 100644
--- a/runtime/descriptor.h
+++ b/runtime/descriptor.h

@@ -53,6 +53,19 @@
     raw_.extent = upper >= lower ? upper - lower + 1 : 0;
     return *this;
   }
+  Dimension &SetLowerBound(SubscriptValue lower) {
+    raw_.lower_bound = lower;
+    return *this;
+  }
+  Dimension &SetUpperBound(SubscriptValue upper) {
+    auto lower{raw_.lower_bound};
+    raw_.extent = upper >= lower ? upper - lower + 1 : 0;
+    return *this;
+  }
+  Dimension &SetExtent(SubscriptValue extent) {
+    raw_.extent = extent;
+    return *this;
+  }
   Dimension &SetByteStride(SubscriptValue bytes) {
     raw_.sm = bytes;
     return *this;
@@ -137,8 +150,8 @@
     raw_.f18Addendum = false;
   }
   Descriptor(const Descriptor &);
-
   ~Descriptor();
+  Descriptor &operator=(const Descriptor &);
 
   static constexpr std::size_t BytesFor(TypeCategory category, int kind) {
     return category == TypeCategory::Complex ? kind * 2 : kind;

diff --git a/runtime/entry-names.h b/runtime/entry-names.h
index e2581ca..4b669bd 100644
--- a/runtime/entry-names.h
+++ b/runtime/entry-names.h

@@ -1,20 +1,21 @@
-//===-- runtime/entry-names.h -----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
+/*===-- runtime/entry-names.h ---------------------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===------------------------------------------------------------------------===
+ */
 
-// Defines the macro RTNAME(n) which decorates the external name of a runtime
-// library function or object with extra characters so that it
-// (a) is not in the user's name space,
-// (b) doesn't conflict with other libraries, and
-// (c) prevents incompatible versions of the runtime library from linking
-//
-// The value of REVISION should not be changed until/unless the API to the
-// runtime library must change in some way that breaks backward compatibility.
-
+/* Defines the macro RTNAME(n) which decorates the external name of a runtime
+ * library function or object with extra characters so that it
+ * (a) is not in the user's name space,
+ * (b) doesn't conflict with other libraries, and
+ * (c) prevents incompatible versions of the runtime library from linking
+ *
+ * The value of REVISION should not be changed until/unless the API to the
+ * runtime library must change in some way that breaks backward compatibility.
+ */
 #ifndef RTNAME
 #define NAME_WITH_PREFIX_AND_REVISION(prefix, revision, name) \
   prefix##revision##name

diff --git a/runtime/io-api.cpp b/runtime/io-api.cpp
index 0dcfabd..57a52bf 100644
--- a/runtime/io-api.cpp
+++ b/runtime/io-api.cpp

@@ -856,26 +856,6 @@
   return false;
 }
 
-template <typename INT>
-static bool SetInteger(INT &x, int kind, std::int64_t value) {
-  switch (kind) {
-  case 1:
-    reinterpret_cast<std::int8_t &>(x) = value;
-    return true;
-  case 2:
-    reinterpret_cast<std::int16_t &>(x) = value;
-    return true;
-  case 4:
-    reinterpret_cast<std::int32_t &>(x) = value;
-    return true;
-  case 8:
-    reinterpret_cast<std::int64_t &>(x) = value;
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) {
   IoStatementState &io{*cookie};
   auto *open{io.get_if<OpenStatementState>()};

diff --git a/runtime/reduction.cpp b/runtime/reduction.cpp
new file mode 100644
index 0000000..7163530
--- /dev/null
+++ b/runtime/reduction.cpp

@@ -0,0 +1,1525 @@
+//===-- runtime/reduction.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, PRODUCT, and SUM
+// for all required operand types and shapes and (for MAXLOC & MINLOC) kinds of
+// results.
+//
+// * Real and complex SUM reductions attempt to reduce floating-point
+//   cancellation on intermediate results by adding up partial sums
+//   for positive and negative elements independently.
+// * Partial reductions (i.e., those with DIM= arguments that are not
+//   required to be 1 by the rank of the argument) return arrays that
+//   are dynamically allocated in a caller-supplied descriptor.
+// * Total reductions (i.e., no DIM= argument) with MAXLOC & MINLOC
+//   return integer vectors of some kind, not scalars; a caller-supplied
+//   descriptor is used
+// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
+//   length results, dynamically allocated in a caller-supplied descriptor
+
+#include "reduction.h"
+#include "character.h"
+#include "cpp-type.h"
+#include "terminator.h"
+#include "tools.h"
+#include "flang/Common/long-double.h"
+#include <cinttypes>
+#include <complex>
+#include <limits>
+#include <type_traits>
+
+namespace Fortran::runtime {
+
+// Generic reduction templates
+
+// Reductions are implemented with *accumulators*, which are instances of
+// classes that incrementally build up the result (or an element thereof) during
+// a traversal of the unmasked elements of an array.  Each accumulator class
+// supports a constructor (which captures a reference to the array), an
+// AccumulateAt() member function that applies supplied subscripts to the
+// array and does something with a scalar element, and a GetResult()
+// member function that copies a final result into its destination.
+
+// Total reduction of the array argument to a scalar (or to a vector in the
+// cases of MAXLOC & MINLOC).  These are the cases without DIM= or cases
+// where the argument has rank 1 and DIM=, if present, must be 1.
+template <typename TYPE, typename ACCUMULATOR>
+inline void DoTotalReduction(const Descriptor &x, int dim,
+    const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
+    Terminator &terminator) {
+  if (dim < 0 || dim > 1) {
+    terminator.Crash(
+        "%s: bad DIM=%d for argument with rank %d", intrinsic, dim, x.rank());
+  }
+  SubscriptValue xAt[maxRank];
+  x.GetLowerBounds(xAt);
+  if (mask) {
+    CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
+    SubscriptValue maskAt[maxRank];
+    mask->GetLowerBounds(maskAt);
+    if (mask->rank() > 0) {
+      for (auto elements{x.Elements()}; elements--;
+           x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
+        if (IsLogicalElementTrue(*mask, maskAt)) {
+          accumulator.template AccumulateAt<TYPE>(xAt);
+        }
+      }
+      return;
+    } else if (!IsLogicalElementTrue(*mask, maskAt)) {
+      // scalar MASK=.FALSE.: return identity value
+      return;
+    }
+  }
+  // No MASK=, or scalar MASK=.TRUE.
+  for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
+    if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+      break; // cut short, result is known
+    }
+  }
+}
+
+template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
+inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask,
+    ACCUMULATOR &&accumulator, const char *intrinsic) {
+  Terminator terminator{source, line};
+  RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
+  using CppType = CppTypeFor<CAT, KIND>;
+  DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
+  CppType result;
+  accumulator.template GetResult(&result);
+  return result;
+}
+
+// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
+// of the array is [2,3,5], the shape of the result is [2,5] and
+// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
+// lower bounds other than one.  This utility subroutine creates an
+// array of subscripts [j,_,k] for result subscripts [j,k] so that the
+// elemets of array(j,:,k) can be reduced.
+inline void GetExpandedSubscripts(SubscriptValue at[],
+    const Descriptor &descriptor, int zeroBasedDim,
+    const SubscriptValue from[]) {
+  descriptor.GetLowerBounds(at);
+  int rank{descriptor.rank()};
+  int j{0};
+  for (; j < zeroBasedDim; ++j) {
+    at[j] += from[j] - 1 /*lower bound*/;
+  }
+  for (++j; j < rank; ++j) {
+    at[j] += from[j - 1] - 1;
+  }
+}
+
+template <typename TYPE, typename ACCUMULATOR>
+inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[], TYPE *result) {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  const auto &dim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue at{dim.LowerBound()};
+  for (auto n{dim.Extent()}; n-- > 0; ++at) {
+    xAt[zeroBasedDim] = at;
+    if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+      break;
+    }
+  }
+  accumulator.template GetResult(result, zeroBasedDim);
+}
+
+template <typename TYPE, typename ACCUMULATOR>
+inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[], const Descriptor &mask, TYPE *result) {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank], maskAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
+  const auto &xDim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue xPos{xDim.LowerBound()};
+  const auto &maskDim{mask.GetDimension(zeroBasedDim)};
+  SubscriptValue maskPos{maskDim.LowerBound()};
+  for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
+       ++xPos, ++maskPos) {
+    maskAt[zeroBasedDim] = maskPos;
+    if (IsLogicalElementTrue(mask, maskAt)) {
+      xAt[zeroBasedDim] = xPos;
+      if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+        break;
+      }
+    }
+  }
+  accumulator.template GetResult(result, zeroBasedDim);
+}
+
+// Utility: establishes & allocates the result array for a partial
+// reduction (i.e., one with DIM=).
+static void CreatePartialReductionResult(Descriptor &result,
+    const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
+    TypeCode typeCode) {
+  int xRank{x.rank()};
+  if (dim < 1 || dim > xRank) {
+    terminator.Crash("%s: bad DIM=%d for rank %d", intrinsic, dim, xRank);
+  }
+  int zeroBasedDim{dim - 1};
+  SubscriptValue resultExtent[maxRank];
+  for (int j{0}; j < zeroBasedDim; ++j) {
+    resultExtent[j] = x.GetDimension(j).Extent();
+  }
+  for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
+    resultExtent[j - 1] = x.GetDimension(j).Extent();
+  }
+  result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
+      CFI_attribute_allocatable);
+  for (int j{0}; j + 1 < xRank; ++j) {
+    result.GetDimension(j).SetBounds(1, resultExtent[j]);
+  }
+  if (int stat{result.Allocate()}) {
+    terminator.Crash(
+        "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+  }
+}
+
+// Partial reductions with DIM=
+
+template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
+inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
+    const Descriptor *mask, Terminator &terminator, const char *intrinsic) {
+  CreatePartialReductionResult(
+      result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<CAT, KIND>;
+  if (mask) {
+    CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
+    SubscriptValue maskAt[maxRank]; // contents unused
+    if (mask->rank() > 0) {
+      for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
+            x, dim - 1, at, *mask, result.Element<CppType>(at));
+      }
+      return;
+    } else if (!IsLogicalElementTrue(*mask, maskAt)) {
+      // scalar MASK=.FALSE.
+      ACCUMULATOR accumulator{x};
+      for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        accumulator.GetResult(result.Element<CppType>(at));
+      }
+      return;
+    }
+  }
+  // No MASK= or scalar MASK=.TRUE.
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    ReduceDimToScalar<CppType, ACCUMULATOR>(
+        x, dim - 1, at, result.Element<CppType>(at));
+  }
+}
+
+template <template <typename> class INTEGER_ACCUM,
+    template <typename> class REAL_ACCUM,
+    template <typename> class COMPLEX_ACCUM>
+inline void TypedPartialNumericReduction(Descriptor &result,
+    const Descriptor &x, int dim, const char *source, int line,
+    const Descriptor *mask, const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 1>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 2:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 2>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 4:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 4>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 8>>,
+          TypeCategory::Integer, 8>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 16:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 16>>,
+          TypeCategory::Integer, 16>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+#if 0 // TODO
+    case 2:
+    case 3:
+#endif
+    case 4:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Real, 4>(result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Real, 8>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
+          TypeCategory::Real, 10>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
+          TypeCategory::Real, 16>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Complex:
+    switch (catKind->second) {
+#if 0 // TODO
+    case 2:
+    case 3:
+#endif
+    case 4:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Complex, 4>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Complex, 8>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
+          TypeCategory::Complex, 10>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
+          TypeCategory::Complex, 16>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#endif
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
+}
+
+// SUM()
+
+template <typename INTERMEDIATE> class IntegerSumAccumulator {
+public:
+  explicit IntegerSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = static_cast<A>(sum_);
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    sum_ += *array_.Element<A>(at);
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE sum_{0};
+};
+
+template <typename INTERMEDIATE> class RealSumAccumulator {
+public:
+  explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> A Result() const {
+    auto sum{static_cast<A>(positives_ + negatives_)};
+    return std::isfinite(sum) ? sum : static_cast<A>(inOrder_);
+  }
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = Result<A>();
+  }
+  template <typename A> bool Accumulate(A x) {
+    // Accumulate the nonnegative and negative elements independently
+    // to reduce cancellation; also record an in-order sum for use
+    // in case of overflow.
+    if (x >= 0) {
+      positives_ += x;
+    } else {
+      negatives_ += x;
+    }
+    inOrder_ += x;
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE positives_{0.0}, negatives_{0.0}, inOrder_{0.0};
+};
+
+template <typename PART> class ComplexSumAccumulator {
+public:
+  explicit ComplexSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    using ResultPart = typename A::value_type;
+    *p = {reals_.template Result<ResultPart>(),
+        imaginaries_.template Result<ResultPart>()};
+  }
+  template <typename A> bool Accumulate(const A &z) {
+    reals_.Accumulate(z.real());
+    imaginaries_.Accumulate(z.imag());
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  RealSumAccumulator<PART> reals_{array_}, imaginaries_{array_};
+};
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(SumInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(SumInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(SumInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(SumInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(SumInteger16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
+      mask, IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
+      "SUM");
+}
+
+// TODO: real/complex(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(SumReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 4>(
+      x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(SumReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 8>(
+      x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(SumReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 10>(
+      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(SumReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 16>(
+      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+}
+#endif
+
+void RTNAME(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 4>(
+      x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
+}
+void RTNAME(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 8>(
+      x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
+}
+#if LONG_DOUBLE == 80
+void RTNAME(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 10>(
+      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+}
+#elif LONG_DOUBLE == 128
+void RTNAME(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 16>(
+      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+}
+#endif
+
+void RTNAME(SumDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  TypedPartialNumericReduction<IntegerSumAccumulator, RealSumAccumulator,
+      ComplexSumAccumulator>(result, x, dim, source, line, mask, "SUM");
+}
+} // extern "C"
+
+// PRODUCT()
+
+template <typename INTERMEDIATE> class NonComplexProductAccumulator {
+public:
+  explicit NonComplexProductAccumulator(const Descriptor &array)
+      : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = static_cast<A>(product_);
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    product_ *= *array_.Element<A>(at);
+    return product_ != 0;
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE product_{1};
+};
+
+template <typename PART> class ComplexProductAccumulator {
+public:
+  explicit ComplexProductAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    using ResultPart = typename A::value_type;
+    *p = {static_cast<ResultPart>(product_.real()),
+        static_cast<ResultPart>(product_.imag())};
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    product_ *= *array_.Element<A>(at);
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  std::complex<PART> product_{1, 0};
+};
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(ProductInteger1)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(ProductInteger2)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(ProductInteger4)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(ProductInteger8)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(ProductInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
+      mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
+      "PRODUCT");
+}
+
+// TODO: real/complex(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(ProductReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 4>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(ProductReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 8>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(ProductReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 10>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
+      "PRODUCT");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(ProductReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 16>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
+      "PRODUCT");
+}
+#endif
+
+void RTNAME(CppProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 4>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+void RTNAME(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 8>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+#if LONG_DOUBLE == 80
+void RTNAME(CppProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 10>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
+      "PRODUCT");
+}
+#elif LONG_DOUBLE == 128
+void RTNAME(CppProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 16>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
+      "PRODUCT");
+}
+#endif
+
+void RTNAME(ProductDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  TypedPartialNumericReduction<NonComplexProductAccumulator,
+      NonComplexProductAccumulator, ComplexProductAccumulator>(
+      result, x, dim, source, line, mask, "PRODUCT");
+}
+} // extern "C"
+
+// MAXLOC and MINLOC
+
+template <typename T, bool IS_MAX, bool BACK> struct NumericCompare {
+  using Type = T;
+  explicit NumericCompare(std::size_t /*elemLen; ignored*/) {}
+  bool operator()(const T &value, const T &previous) {
+    if (BACK && value == previous) {
+      return true;
+    } else if constexpr (IS_MAX) {
+      return value > previous;
+    } else {
+      return value < previous;
+    }
+  }
+};
+
+template <typename T, bool IS_MAX, bool BACK> class CharacterCompare {
+public:
+  using Type = T;
+  explicit CharacterCompare(std::size_t elemLen)
+      : chars_{elemLen / sizeof(T)} {}
+  bool operator()(const T &value, const T &previous) {
+    int cmp{CharacterScalarCompare<T>(&value, &previous, chars_, chars_)};
+    if (BACK && cmp == 0) {
+      return true;
+    } else if constexpr (IS_MAX) {
+      return cmp > 0;
+    } else {
+      return cmp < 0;
+    }
+  }
+
+private:
+  std::size_t chars_;
+};
+
+template <typename COMPARE> class ExtremumLocAccumulator {
+public:
+  using Type = typename COMPARE::Type;
+  ExtremumLocAccumulator(const Descriptor &array, std::size_t chars = 0)
+      : array_{array}, argRank_{array.rank()}, compare_{array.ElementBytes()} {
+    // per standard: result indices are all zero if no data
+    for (int j{0}; j < argRank_; ++j) {
+      extremumLoc_[j] = 0;
+    }
+  }
+  int argRank() const { return argRank_; }
+  template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
+    if (zeroBasedDim >= 0) {
+      *p = extremumLoc_[zeroBasedDim];
+    } else {
+      for (int j{0}; j < argRank_; ++j) {
+        p[j] = extremumLoc_[j];
+      }
+    }
+  }
+  template <typename IGNORED> bool AccumulateAt(const SubscriptValue at[]) {
+    const auto &value{*array_.Element<Type>(at)};
+    if (!previous_ || compare_(value, *previous_)) {
+      previous_ = &value;
+      for (int j{0}; j < argRank_; ++j) {
+        extremumLoc_[j] = at[j];
+      }
+    }
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  int argRank_;
+  SubscriptValue extremumLoc_[maxRank];
+  const Type *previous_{nullptr};
+  COMPARE compare_;
+};
+
+template <typename COMPARE, typename CPPTYPE>
+static void DoMaxOrMinLocHelper(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const Descriptor *mask,
+    Terminator &terminator) {
+  ExtremumLocAccumulator<COMPARE> accumulator{x};
+  DoTotalReduction<CPPTYPE>(x, 0, mask, accumulator, intrinsic, terminator);
+  switch (kind) {
+  case 1:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 1>>());
+    break;
+  case 2:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 2>>());
+    break;
+  case 4:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 4>>());
+    break;
+  case 8:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 8>>());
+    break;
+  case 16:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 16>>());
+    break;
+  default:
+    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
+  }
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE>
+inline void DoMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  using CppType = CppTypeFor<CAT, KIND>;
+  Terminator terminator{source, line};
+  if (back) {
+    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, true>, CppType>(
+        intrinsic, result, x, kind, mask, terminator);
+  } else {
+    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, false>, CppType>(
+        intrinsic, result, x, kind, mask, terminator);
+  }
+}
+
+template <bool IS_MAX>
+inline void TypedMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  int rank{x.rank()};
+  SubscriptValue extent[1]{rank};
+  result.Establish(TypeCategory::Integer, kind, nullptr, 1, extent,
+      CFI_attribute_allocatable);
+  result.GetDimension(0).SetBounds(1, extent[0]);
+  Terminator terminator{source, line};
+  if (int stat{result.Allocate()}) {
+    terminator.Crash(
+        "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+  }
+  CheckIntegerKind(terminator, kind, intrinsic);
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      DoMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 2:
+      DoMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 8:
+      DoMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 16:
+      DoMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 8:
+      DoMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Character:
+    switch (catKind->second) {
+    case 1:
+      DoMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 2:
+      DoMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash(
+      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
+}
+
+extern "C" {
+void RTNAME(Maxloc)(Descriptor &result, const Descriptor &x, int kind,
+    const char *source, int line, const Descriptor *mask, bool back) {
+  TypedMaxOrMinLoc<true>("MAXLOC", result, x, kind, source, line, mask, back);
+}
+void RTNAME(Minloc)(Descriptor &result, const Descriptor &x, int kind,
+    const char *source, int line, const Descriptor *mask, bool back) {
+  TypedMaxOrMinLoc<false>("MINLOC", result, x, kind, source, line, mask, back);
+}
+} // extern "C"
+
+// MAXLOC/MINLOC with DIM=
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE, bool BACK>
+static void DoPartialMaxOrMinLocDirection(const char *intrinsic,
+    Descriptor &result, const Descriptor &x, int kind, int dim,
+    const Descriptor *mask, Terminator &terminator) {
+  using CppType = CppTypeFor<CAT, KIND>;
+  switch (kind) {
+  case 1:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 1>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 2:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 2>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 4:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 4>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 8:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 8>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 16:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 16>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  default:
+    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
+  }
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE>
+inline void DoPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, int dim, const Descriptor *mask, bool back,
+    Terminator &terminator) {
+  if (back) {
+    DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, true>(
+        intrinsic, result, x, kind, dim, mask, terminator);
+  } else {
+    DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, false>(
+        intrinsic, result, x, kind, dim, mask, terminator);
+  }
+}
+
+template <bool IS_MAX>
+inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, int dim, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  Terminator terminator{source, line};
+  CheckIntegerKind(terminator, kind, intrinsic);
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 2:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 8:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 16:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 8:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Character:
+    switch (catKind->second) {
+    case 1:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 2:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash(
+      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
+}
+
+extern "C" {
+void RTNAME(MaxlocDim)(Descriptor &result, const Descriptor &x, int kind,
+    int dim, const char *source, int line, const Descriptor *mask, bool back) {
+  TypedPartialMaxOrMinLoc<true>(
+      "MAXLOC", result, x, kind, dim, source, line, mask, back);
+}
+void RTNAME(MinlocDim)(Descriptor &result, const Descriptor &x, int kind,
+    int dim, const char *source, int line, const Descriptor *mask, bool back) {
+  TypedPartialMaxOrMinLoc<false>(
+      "MINLOC", result, x, kind, dim, source, line, mask, back);
+}
+} // extern "C"
+
+// MAXVAL and MINVAL
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL> struct MaxOrMinIdentity {
+  using Type = CppTypeFor<CAT, KIND>;
+  static constexpr Type Value() {
+    return IS_MAXVAL ? std::numeric_limits<Type>::lowest()
+                     : std::numeric_limits<Type>::max();
+  }
+};
+
+// std::numeric_limits<> may not know int128_t
+template <bool IS_MAXVAL>
+struct MaxOrMinIdentity<TypeCategory::Integer, 16, IS_MAXVAL> {
+  using Type = CppTypeFor<TypeCategory::Integer, 16>;
+  static constexpr Type Value() {
+    return IS_MAXVAL ? Type{1} << 127 : ~Type{0} >> 1;
+  }
+};
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
+class NumericExtremumAccumulator {
+public:
+  using Type = CppTypeFor<CAT, KIND>;
+  NumericExtremumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = extremum_;
+  }
+  bool Accumulate(Type x) {
+    if constexpr (IS_MAXVAL) {
+      if (x > extremum_) {
+        extremum_ = x;
+      }
+    } else if (x < extremum_) {
+      extremum_ = x;
+    }
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  Type extremum_{MaxOrMinIdentity<CAT, KIND, IS_MAXVAL>::Value()};
+};
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
+inline CppTypeFor<CAT, KIND> TotalNumericMaxOrMin(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const char *intrinsic) {
+  return GetTotalReduction<CAT, KIND>(x, source, line, dim, mask,
+      NumericExtremumAccumulator<CAT, KIND, IS_MAXVAL>{x}, intrinsic);
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL,
+    template <TypeCategory, int, bool> class ACCUMULATOR>
+static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const Descriptor *mask, const char *intrinsic, Terminator &terminator) {
+  using Type = CppTypeFor<CAT, KIND>;
+  if (dim == 0 || x.rank() == 1) {
+    // Total reduction
+    result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
+        CFI_attribute_allocatable);
+    if (int stat{result.Allocate()}) {
+      terminator.Crash(
+          "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+    }
+    ACCUMULATOR<CAT, KIND, IS_MAXVAL> accumulator{x};
+    DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
+    accumulator.GetResult(result.OffsetElement<Type>());
+  } else {
+    // Partial reduction
+    PartialReduction<ACCUMULATOR<CAT, KIND, IS_MAXVAL>, CAT, KIND>(
+        result, x, dim, mask, terminator, intrinsic);
+  }
+}
+
+template <bool IS_MAXVAL>
+inline void NumericMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask,
+    const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type);
+  switch (type->first) {
+  case TypeCategory::Integer:
+    switch (type->second) {
+    case 1:
+      DoMaxOrMin<TypeCategory::Integer, 1, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 2:
+      DoMaxOrMin<TypeCategory::Integer, 2, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 4:
+      DoMaxOrMin<TypeCategory::Integer, 4, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 8:
+      DoMaxOrMin<TypeCategory::Integer, 8, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 16:
+      DoMaxOrMin<TypeCategory::Integer, 16, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (type->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoMaxOrMin<TypeCategory::Real, 4, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 8:
+      DoMaxOrMin<TypeCategory::Real, 8, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoMaxOrMin<TypeCategory::Real, 10, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoMaxOrMin<TypeCategory::Real, 16, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#endif
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
+}
+
+template <TypeCategory, int KIND, bool IS_MAXVAL>
+class CharacterExtremumAccumulator {
+public:
+  using Type = CppTypeFor<TypeCategory::Character, KIND>;
+  CharacterExtremumAccumulator(const Descriptor &array)
+      : array_{array}, charLen_{array_.ElementBytes() / KIND} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    static_assert(std::is_same_v<A, Type>);
+    if (extremum_) {
+      std::memcpy(p, extremum_, charLen_);
+    } else {
+      // empty array: result is all zero-valued characters
+      std::memset(p, 0, charLen_);
+    }
+  }
+  bool Accumulate(const Type *x) {
+    if (!extremum_) {
+      extremum_ = x;
+    } else {
+      int cmp{CharacterScalarCompare(x, extremum_, charLen_, charLen_)};
+      if (IS_MAXVAL == (cmp > 0)) {
+        extremum_ = x;
+      }
+    }
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  std::size_t charLen_;
+  const Type *extremum_{nullptr};
+};
+
+template <bool IS_MAXVAL>
+inline void CharacterMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask,
+    const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type && type->first == TypeCategory::Character);
+  switch (type->second) {
+  case 1:
+    DoMaxOrMin<TypeCategory::Character, 1, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  case 2:
+    DoMaxOrMin<TypeCategory::Character, 2, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  case 4:
+    DoMaxOrMin<TypeCategory::Character, 4, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  default:
+    terminator.Crash("%s: bad character kind %d", intrinsic, type->second);
+  }
+}
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(MaxvalInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 1, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(MaxvalInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 2, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(MaxvalInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 4, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(MaxvalInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 8, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(MaxvalInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 16, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+
+// TODO: REAL(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(MaxvalReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 4, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(MaxvalReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 8, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(MaxvalReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 10, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(MaxvalReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 16, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#endif
+
+void RTNAME(MaxvalCharacter)(Descriptor &result, const Descriptor &x,
+    const char *source, int line, const Descriptor *mask) {
+  CharacterMaxOrMin<true>(result, x, 0, source, line, mask, "MAXVAL");
+}
+
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(MinvalInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 1, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(MinvalInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 2, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(MinvalInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 4, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(MinvalInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 8, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(MinvalInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 16, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+
+// TODO: REAL(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(MinvalReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 4, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(MinvalReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 8, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(MinvalReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 10, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(MinvalReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 16, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#endif
+
+void RTNAME(MinvalCharacter)(Descriptor &result, const Descriptor &x,
+    const char *source, int line, const Descriptor *mask) {
+  CharacterMaxOrMin<false>(result, x, 0, source, line, mask, "MINVAL");
+}
+
+void RTNAME(MaxvalDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  if (x.type().IsCharacter()) {
+    CharacterMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
+  } else {
+    NumericMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
+  }
+}
+void RTNAME(MinvalDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  if (x.type().IsCharacter()) {
+    CharacterMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
+  } else {
+    NumericMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
+  }
+}
+
+} // extern "C"
+
+// ALL, ANY, & COUNT
+
+template <bool IS_ALL> class LogicalAccumulator {
+public:
+  using Type = bool;
+  explicit LogicalAccumulator(const Descriptor &array) : array_{array} {}
+  bool Result() const { return result_; }
+  bool Accumulate(bool x) {
+    if (x == IS_ALL) {
+      return true;
+    } else {
+      result_ = x;
+      return false;
+    }
+  }
+  template <typename IGNORED = void>
+  bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(IsLogicalElementTrue(array_, at));
+  }
+
+private:
+  const Descriptor &array_;
+  bool result_{IS_ALL};
+};
+
+template <typename ACCUMULATOR>
+inline auto GetTotalLogicalReduction(const Descriptor &x, const char *source,
+    int line, int dim, ACCUMULATOR &&accumulator, const char *intrinsic) ->
+    typename ACCUMULATOR::Type {
+  Terminator terminator{source, line};
+  if (dim < 0 || dim > 1) {
+    terminator.Crash("%s: bad DIM=%d", intrinsic, dim);
+  }
+  SubscriptValue xAt[maxRank];
+  x.GetLowerBounds(xAt);
+  for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
+    if (!accumulator.AccumulateAt(xAt)) {
+      break; // cut short, result is known
+    }
+  }
+  return accumulator.Result();
+}
+
+template <typename ACCUMULATOR>
+inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[]) -> typename ACCUMULATOR::Type {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  const auto &dim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue at{dim.LowerBound()};
+  for (auto n{dim.Extent()}; n-- > 0; ++at) {
+    xAt[zeroBasedDim] = at;
+    if (!accumulator.AccumulateAt(xAt)) {
+      break;
+    }
+  }
+  return accumulator.Result();
+}
+
+template <bool IS_ALL, int KIND>
+inline void ReduceLogicalDimension(Descriptor &result, const Descriptor &x,
+    int dim, Terminator &terminator, const char *intrinsic) {
+  // Standard requires result to have same LOGICAL kind as argument.
+  CreatePartialReductionResult(result, x, dim, terminator, intrinsic, x.type());
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    *result.Element<CppType>(at) =
+        ReduceLogicalDimToScalar<LogicalAccumulator<IS_ALL>>(x, dim - 1, at);
+  }
+}
+
+template <bool IS_ALL>
+inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x,
+    int dim, Terminator &terminator, const char *intrinsic) {
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical);
+  switch (catKind->second) {
+  case 1:
+    ReduceLogicalDimension<IS_ALL, 1>(result, x, dim, terminator, intrinsic);
+    break;
+  case 2:
+    ReduceLogicalDimension<IS_ALL, 2>(result, x, dim, terminator, intrinsic);
+    break;
+  case 4:
+    ReduceLogicalDimension<IS_ALL, 4>(result, x, dim, terminator, intrinsic);
+    break;
+  case 8:
+    ReduceLogicalDimension<IS_ALL, 8>(result, x, dim, terminator, intrinsic);
+    break;
+  default:
+    terminator.Crash(
+        "%s: bad argument type LOGICAL(%d)", intrinsic, catKind->second);
+  }
+}
+
+// COUNT
+
+class CountAccumulator {
+public:
+  using Type = std::int64_t;
+  explicit CountAccumulator(const Descriptor &array) : array_{array} {}
+  Type Result() const { return result_; }
+  template <typename IGNORED = void>
+  bool AccumulateAt(const SubscriptValue at[]) {
+    if (IsLogicalElementTrue(array_, at)) {
+      ++result_;
+    }
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  Type result_{0};
+};
+
+template <int KIND>
+inline void CountDimension(
+    Descriptor &result, const Descriptor &x, int dim, Terminator &terminator) {
+  CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
+      TypeCode{TypeCategory::Integer, KIND});
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    *result.Element<CppType>(at) =
+        ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
+  }
+}
+
+extern "C" {
+
+bool RTNAME(All)(const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, LogicalAccumulator<true>{x}, "ALL");
+}
+void RTNAME(AllDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  DoReduceLogicalDimension<true>(result, x, dim, terminator, "ALL");
+}
+
+bool RTNAME(Any)(const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, LogicalAccumulator<false>{x}, "ANY");
+}
+void RTNAME(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  DoReduceLogicalDimension<false>(result, x, dim, terminator, "ANY");
+}
+
+std::int64_t RTNAME(Count)(
+    const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, CountAccumulator{x}, "COUNT");
+}
+void RTNAME(CountDim)(Descriptor &result, const Descriptor &x, int dim,
+    int kind, const char *source, int line) {
+  Terminator terminator{source, line};
+  switch (kind) {
+  case 1:
+    CountDimension<1>(result, x, dim, terminator);
+    break;
+  case 2:
+    CountDimension<2>(result, x, dim, terminator);
+    break;
+  case 4:
+    CountDimension<4>(result, x, dim, terminator);
+    break;
+  case 8:
+    CountDimension<8>(result, x, dim, terminator);
+    break;
+  case 16:
+    CountDimension<16>(result, x, dim, terminator);
+    break;
+  default:
+    terminator.Crash("COUNT: bad KIND=%d", kind);
+  }
+}
+
+} // extern "C"
+} // namespace Fortran::runtime

diff --git a/runtime/reduction.h b/runtime/reduction.h
new file mode 100644
index 0000000..90a1fcf
--- /dev/null
+++ b/runtime/reduction.h

@@ -0,0 +1,230 @@
+//===-- runtime/reduction.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Defines the API for the reduction transformational intrinsic functions.
+// (Except the complex-valued total reduction forms of SUM and PRODUCT;
+// the API for those is in complex-reduction.h so that C's _Complex can
+// be used for their return types.)
+
+#ifndef FORTRAN_RUNTIME_REDUCTION_H_
+#define FORTRAN_RUNTIME_REDUCTION_H_
+
+#include "descriptor.h"
+#include "entry-names.h"
+#include "flang/Common/uint128.h"
+#include <complex>
+#include <cstdint>
+
+namespace Fortran::runtime {
+extern "C" {
+
+// Reductions that are known to return scalars have per-type entry
+// points.  These cover the casse that either have no DIM=
+// argument, or have an argument rank of 1.  Pass 0 for no DIM=
+// or the value of the DIM= argument so that it may be checked.
+// The data type in the descriptor is checked against the expected
+// return type.
+//
+// Reductions that return arrays are the remaining cases in which
+// the argument rank is greater than one and there is a DIM=
+// argument present.  These cases establish and allocate their
+// results in a caller-supplied descriptor, which is assumed to
+// be large enough.
+//
+// Complex-valued SUM and PRODUCT reductions have their API
+// entry points defined in complex-reduction.h; these are C wrappers
+// around C++ implementations so as to keep usage of C's _Complex
+// types out of C++ code.
+
+// SUM()
+
+std::int8_t RTNAME(SumInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(SumInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(SumInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(SumInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(SumInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+
+// REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
+float RTNAME(SumReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(SumReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(SumReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(SumReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(SumReal10)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(SumReal16)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+
+void RTNAME(CppSumComplex2)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex3)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex4)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex8)(std::complex<double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex10)(std::complex<long double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex16)(std::complex<long double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+void RTNAME(SumDim)(Descriptor &result, const Descriptor &array, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// PRODUCT()
+
+std::int8_t RTNAME(ProductInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(ProductInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(ProductInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(ProductInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(ProductInteger16)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+// REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
+float RTNAME(ProductReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(ProductReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(ProductReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(ProductReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(ProductReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(ProductReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+
+void RTNAME(CppProductComplex2)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex3)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex4)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex8)(std::complex<double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex10)(std::complex<long double> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex16)(std::complex<long double> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+void RTNAME(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// MAXLOC and MINLOC
+// These return allocated arrays in the supplied descriptor.
+// The default value for KIND= should be the default INTEGER in effect at
+// compilation time.
+void RTNAME(Maxloc)(Descriptor &, const Descriptor &, int kind,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(MaxlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(Minloc)(Descriptor &, const Descriptor &, int kind,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(MinlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+
+// MAXVAL and MINVAL
+std::int8_t RTNAME(MaxvalInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(MaxvalInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(MaxvalInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(MaxvalInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(MaxvalInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(MaxvalReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MaxvalReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MaxvalReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+void RTNAME(MaxvalCharacter)(Descriptor &, const Descriptor &,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+std::int8_t RTNAME(MinvalInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(MinvalInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(MinvalInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(MivalInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(MivalInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(MinvalReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MinvalReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MinvalReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+void RTNAME(MinvalCharacter)(Descriptor &, const Descriptor &,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+void RTNAME(MaxvalDim)(Descriptor &, const Descriptor &, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// ALL, ANY, & COUNT logical reductions
+bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim,
+    const char *source, int line);
+bool RTNAME(Any)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(AnyDim)(Descriptor &result, const Descriptor &, int dim,
+    const char *source, int line);
+std::int64_t RTNAME(Count)(
+    const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(CountDim)(Descriptor &result, const Descriptor &, int dim, int kind,
+    const char *source, int line);
+
+} // extern "C"
+} // namespace Fortran::runtime
+#endif // FORTRAN_RUNTIME_REDUCTION_H_

diff --git a/runtime/tools.cpp b/runtime/tools.cpp
index 219daaf..2d036f5 100644
--- a/runtime/tools.cpp
+++ b/runtime/tools.cpp

@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "tools.h"
+#include "terminator.h"
 #include <algorithm>
+#include <cstdint>
 #include <cstring>
 
 namespace Fortran::runtime {
@@ -75,4 +77,34 @@
   }
 }
 
+void CheckConformability(const Descriptor &to, const Descriptor &x,
+    Terminator &terminator, const char *funcName, const char *toName,
+    const char *xName) {
+  if (x.rank() == 0) {
+    return; // scalar conforms with anything
+  }
+  int rank{to.rank()};
+  if (x.rank() != rank) {
+    terminator.Crash(
+        "Incompatible array arguments to %s: %s has rank %d but %s has rank %d",
+        funcName, toName, rank, xName, x.rank());
+  } else {
+    for (int j{0}; j < rank; ++j) {
+      auto toExtent{static_cast<std::int64_t>(to.GetDimension(j).Extent())};
+      auto xExtent{static_cast<std::int64_t>(x.GetDimension(j).Extent())};
+      if (xExtent != toExtent) {
+        terminator.Crash("Incompatible array arguments to %s: dimension %d of "
+                         "%s has extent %" PRId64 " but %s has extent %" PRId64,
+            funcName, j, toName, toExtent, xName, xExtent);
+      }
+    }
+  }
+}
+
+void CheckIntegerKind(Terminator &terminator, int kind, const char *intrinsic) {
+  if (kind < 1 || kind > 16 || (kind & (kind - 1)) != 0) {
+    terminator.Crash("%s: bad KIND=%d argument", intrinsic, kind);
+  }
+}
+
 } // namespace Fortran::runtime

diff --git a/runtime/tools.h b/runtime/tools.h
index 6c5eb63..1b988d9 100644
--- a/runtime/tools.h
+++ b/runtime/tools.h

@@ -9,7 +9,10 @@
 #ifndef FORTRAN_RUNTIME_TOOLS_H_
 #define FORTRAN_RUNTIME_TOOLS_H_
 
+#include "cpp-type.h"
+#include "descriptor.h"
 #include "memory.h"
+#include "terminator.h"
 #include <functional>
 #include <map>
 #include <type_traits>
@@ -33,5 +36,70 @@
 // Truncates or pads as necessary
 void ToFortranDefaultCharacter(
     char *to, std::size_t toLength, const char *from);
+
+// Utility for dealing with elemental LOGICAL arguments
+inline bool IsLogicalElementTrue(
+    const Descriptor &logical, const SubscriptValue at[]) {
+  // A LOGICAL value is false if and only if all of its bytes are zero.
+  const char *p{logical.Element<char>(at)};
+  for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) {
+    if (*p) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Check array conformability; a scalar 'x' conforms.  Crashes on error.
+void CheckConformability(const Descriptor &to, const Descriptor &x,
+    Terminator &, const char *funcName, const char *toName,
+    const char *fromName);
+
+// Validate a KIND= argument
+void CheckIntegerKind(Terminator &, int kind, const char *intrinsic);
+
+template <typename TO, typename FROM>
+inline void PutContiguousConverted(TO *to, FROM *from, std::size_t count) {
+  while (count-- > 0) {
+    *to++ = *from++;
+  }
+}
+
+static inline std::int64_t GetInt64(const char *p, std::size_t bytes) {
+  switch (bytes) {
+  case 1:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 1> *>(p);
+  case 2:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 2> *>(p);
+  case 4:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 4> *>(p);
+  case 8:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 8> *>(p);
+  default:
+    Terminator{__FILE__, __LINE__}.Crash(
+        "GetInt64: no case for %zd bytes", bytes);
+  }
+}
+
+template <typename INT>
+inline bool SetInteger(INT &x, int kind, std::int64_t value) {
+  switch (kind) {
+  case 1:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 1> &>(x) = value;
+    return true;
+  case 2:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 2> &>(x) = value;
+    return true;
+  case 4:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 4> &>(x) = value;
+    return true;
+  case 8:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 8> &>(x) = value;
+    return true;
+  default:
+    return false;
+  }
+}
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_TOOLS_H_

diff --git a/runtime/transformational.cpp b/runtime/transformational.cpp
index cd5c7d8..07a34c1 100644
--- a/runtime/transformational.cpp
+++ b/runtime/transformational.cpp

@@ -7,32 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "transformational.h"
-#include "memory.h"
 #include "terminator.h"
+#include "tools.h"
 #include <algorithm>
 #include <cinttypes>
 
 namespace Fortran::runtime {
 
-static inline std::int64_t GetInt64(const char *p, std::size_t bytes) {
-  switch (bytes) {
-  case 1:
-    return *reinterpret_cast<const std::int8_t *>(p);
-  case 2:
-    return *reinterpret_cast<const std::int16_t *>(p);
-  case 4:
-    return *reinterpret_cast<const std::int32_t *>(p);
-  case 8:
-    return *reinterpret_cast<const std::int64_t *>(p);
-  default:
-    Terminator terminator{__FILE__, __LINE__};
-    terminator.Crash("no case for %dz bytes", bytes);
-  }
-}
-
 // F2018 16.9.163
-OwningPtr<Descriptor> RESHAPE(const Descriptor &source, const Descriptor &shape,
-    const Descriptor *pad, const Descriptor *order) {
+OwningPtr<Descriptor> RTNAME(Reshape)(const Descriptor &source,
+    const Descriptor &shape, const Descriptor *pad, const Descriptor *order) {
   // Compute and check the rank of the result.
   Terminator terminator{__FILE__, __LINE__};
   RUNTIME_CHECK(terminator, shape.rank() == 1);

diff --git a/runtime/transformational.h b/runtime/transformational.h
index bf271fa..1994fca 100644
--- a/runtime/transformational.h
+++ b/runtime/transformational.h

@@ -10,11 +10,13 @@
 #define FORTRAN_RUNTIME_TRANSFORMATIONAL_H_
 
 #include "descriptor.h"
+#include "entry-names.h"
 #include "memory.h"
 
 namespace Fortran::runtime {
 
-OwningPtr<Descriptor> RESHAPE(const Descriptor &source, const Descriptor &shape,
-    const Descriptor *pad = nullptr, const Descriptor *order = nullptr);
+OwningPtr<Descriptor> RTNAME(Reshape)(const Descriptor &source,
+    const Descriptor &shape, const Descriptor *pad = nullptr,
+    const Descriptor *order = nullptr);
 }
 #endif // FORTRAN_RUNTIME_TRANSFORMATIONAL_H_

diff --git a/runtime/type-code.cpp b/runtime/type-code.cpp
index 19de2ef..a38a7ab 100644
--- a/runtime/type-code.cpp
+++ b/runtime/type-code.cpp

@@ -78,13 +78,13 @@
       raw_ = CFI_type_Bool;
       break;
     case 2:
-      raw_ = CFI_type_int_fast16_t;
+      raw_ = CFI_type_int_least16_t;
       break;
     case 4:
-      raw_ = CFI_type_int_fast32_t;
+      raw_ = CFI_type_int_least32_t;
       break;
     case 8:
-      raw_ = CFI_type_int_fast64_t;
+      raw_ = CFI_type_int_least64_t;
       break;
     }
     break;
@@ -135,13 +135,13 @@
     return std::make_pair(TypeCategory::Character, 4);
   case CFI_type_Bool:
     return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_fast8_t:
+  case CFI_type_int_least8_t:
     return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_fast16_t:
+  case CFI_type_int_least16_t:
     return std::make_pair(TypeCategory::Logical, 2);
-  case CFI_type_int_fast32_t:
+  case CFI_type_int_least32_t:
     return std::make_pair(TypeCategory::Logical, 4);
-  case CFI_type_int_fast64_t:
+  case CFI_type_int_least64_t:
     return std::make_pair(TypeCategory::Logical, 8);
   case CFI_type_struct:
     return std::make_pair(TypeCategory::Derived, 0);

diff --git a/unittests/Evaluate/reshape.cpp b/unittests/Evaluate/reshape.cpp
index bcc8b49..a51acdb 100644
--- a/unittests/Evaluate/reshape.cpp
+++ b/unittests/Evaluate/reshape.cpp

@@ -52,7 +52,7 @@
   MATCH(2, pad.GetDimension(1).Extent());
   MATCH(3, pad.GetDimension(2).Extent());
 
-  auto result{RESHAPE(*source, *shape, &pad)};
+  auto result{RTNAME(Reshape)(*source, *shape, &pad)};
   TEST(result.get() != nullptr);
   result->Check();
   MATCH(sizeof(std::int32_t), result->ElementBytes());

diff --git a/unittests/RuntimeGTest/CMakeLists.txt b/unittests/RuntimeGTest/CMakeLists.txt
index d4ad6b2..ad6377e 100644
--- a/unittests/RuntimeGTest/CMakeLists.txt
+++ b/unittests/RuntimeGTest/CMakeLists.txt

@@ -1,10 +1,9 @@
 add_flang_unittest(FlangRuntimeTests
   CharacterTest.cpp
-  RuntimeCrashTest.cpp
   CrashHandlerFixture.cpp
   NumericalFormatTest.cpp
+  Reduction.cpp
   RuntimeCrashTest.cpp
-  CrashHandlerFixture.cpp
 )
 
 target_link_libraries(FlangRuntimeTests

diff --git a/unittests/RuntimeGTest/Reduction.cpp b/unittests/RuntimeGTest/Reduction.cpp
new file mode 100644
index 0000000..e8471b6
--- /dev/null
+++ b/unittests/RuntimeGTest/Reduction.cpp

@@ -0,0 +1,296 @@
+//===-- flang/unittests/RuntimeGTest/Reductions.cpp -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../runtime/reduction.h"
+#include "gtest/gtest.h"
+#include "../../runtime/allocatable.h"
+#include "../../runtime/cpp-type.h"
+#include "../../runtime/descriptor.h"
+#include "../../runtime/type-code.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+using namespace Fortran::runtime;
+using Fortran::common::TypeCategory;
+
+template <typename A>
+static void StoreElement(void *p, const A &x, std::size_t bytes) {
+  std::memcpy(p, &x, bytes);
+}
+
+template <typename CHAR>
+static void StoreElement(
+    void *p, const std::basic_string<CHAR> &str, std::size_t bytes) {
+  ASSERT_LE(bytes, sizeof(CHAR) * str.size());
+  std::memcpy(p, str.data(), bytes);
+}
+
+template <TypeCategory CAT, int KIND, typename A>
+static OwningPtr<Descriptor> MakeArray(const std::vector<int> &shape,
+    const std::vector<A> &data, std::size_t elemLen = KIND) {
+  auto rank{static_cast<int>(shape.size())};
+  auto result{Descriptor::Create(TypeCode{CAT, KIND}, elemLen, nullptr, rank,
+      nullptr, CFI_attribute_allocatable)};
+  for (int j{0}; j < rank; ++j) {
+    result->GetDimension(j).SetBounds(1, shape[j]);
+  }
+  int stat{result->Allocate()};
+  EXPECT_EQ(stat, 0) << stat;
+  EXPECT_LE(data.size(), result->Elements());
+  char *p{result->OffsetElement<char>()};
+  for (const auto &x : data) {
+    StoreElement(p, x, elemLen);
+    p += elemLen;
+  }
+  return result;
+}
+
+TEST(Reductions, SumInt4) {
+  auto array{MakeArray<TypeCategory::Integer, 4>(
+      std::vector<int>{2, 3}, std::vector<std::int32_t>{1, 2, 3, 4, 5, 6})};
+  std::int32_t sum{RTNAME(SumInteger4)(*array, __FILE__, __LINE__)};
+  EXPECT_EQ(sum, 21) << sum;
+}
+
+TEST(Reductions, DimMaskProductInt4) {
+  std::vector<int> shape{2, 3};
+  auto array{MakeArray<TypeCategory::Integer, 4>(
+      shape, std::vector<std::int32_t>{1, 2, 3, 4, 5, 6})};
+  auto mask{MakeArray<TypeCategory::Logical, 1>(
+      shape, std::vector<bool>{true, false, false, true, true, true})};
+  StaticDescriptor<1> statDesc;
+  Descriptor &prod{statDesc.descriptor()};
+  RTNAME(ProductDim)(prod, *array, 1, __FILE__, __LINE__, &*mask);
+  EXPECT_EQ(prod.rank(), 1);
+  EXPECT_EQ(prod.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(prod.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(1), 4);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(2), 30);
+  EXPECT_EQ(RTNAME(SumInteger4)(prod, __FILE__, __LINE__), 35);
+  prod.Destroy();
+}
+
+TEST(Reductions, DoubleMaxMin) {
+  std::vector<int> shape{3, 4, 2}; // rows, columns, planes
+  //   0  -3   6  -9     12 -15  18 -21
+  //  -1   4  -7  10    -13  16 -19  22
+  //   2  -5   8 -11     14 -17  20  22   <- note last two are equal to test
+  //   BACK=
+  auto array{MakeArray<TypeCategory::Real, 8>(shape,
+      std::vector<double>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13,
+          14, -15, 16, -17, 18, -19, 20, -21, 22, 22})};
+  EXPECT_EQ(RTNAME(MaxvalReal8)(*array, __FILE__, __LINE__), 22.0);
+  EXPECT_EQ(RTNAME(MinvalReal8)(*array, __FILE__, __LINE__), -21.0);
+  StaticDescriptor<2> statDesc;
+  Descriptor &loc{statDesc.descriptor()};
+  RTNAME(Maxloc)
+  (loc, *array, /*KIND=*/8, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(loc.rank(), 1);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(
+      *array->Element<double>(loc.ZeroBasedIndexedElement<SubscriptValue>(0)),
+      22.0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(0), 2);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(1), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(2), 2);
+  loc.Destroy();
+  RTNAME(Maxloc)
+  (loc, *array, /*KIND=*/8, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/true);
+  EXPECT_EQ(loc.rank(), 1);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(
+      *array->Element<double>(loc.ZeroBasedIndexedElement<SubscriptValue>(0)),
+      22.0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(0), 3);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(1), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(2), 2);
+  loc.Destroy();
+  RTNAME(MinlocDim)
+  (loc, *array, /*KIND=*/2, /*DIM=*/1, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(loc.rank(), 2);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 2}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 4);
+  EXPECT_EQ(loc.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(1).Extent(), 2);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(0), 2); // -1
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(1), 3); // -5
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(2), 2); // -2
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(3), 3); // -11
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(4), 2); // -13
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(5), 3); // -17
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(6), 2); // -19
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(7), 1); // -21
+  loc.Destroy();
+  auto mask{MakeArray<TypeCategory::Logical, 1>(shape,
+      std::vector<bool>{false, false, false, false, false, true, false, true,
+          false, false, true, true, true, false, false, true, false, true, true,
+          true, false, true, true, true})};
+  RTNAME(MaxlocDim)
+  (loc, *array, /*KIND=*/2, /*DIM=*/3, __FILE__, __LINE__, /*MASK=*/&*mask,
+      false);
+  EXPECT_EQ(loc.rank(), 2);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 2}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(loc.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(1).Extent(), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(0), 2); // 12
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(1), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(2), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(3), 2); // -15
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(4), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(5), 1); // -5
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(6), 2); // 18
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(7), 1); // -7
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(8), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(9), 2); // -21
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(10), 2); // 22
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(11), 2); // 22
+  loc.Destroy();
+}
+
+TEST(Reductions, Character) {
+  std::vector<int> shape{2, 3};
+  auto array{MakeArray<TypeCategory::Character, 1>(shape,
+      std::vector<std::string>{"abc", "def", "ghi", "jkl", "mno", "abc"}, 3)};
+  StaticDescriptor<1> statDesc;
+  Descriptor &res{statDesc.descriptor()};
+  RTNAME(MaxvalCharacter)(res, *array, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 0);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Character, 1}.raw()));
+  EXPECT_EQ(std::memcmp(res.OffsetElement<char>(), "mno", 3), 0);
+  res.Destroy();
+  RTNAME(MinvalCharacter)(res, *array, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 0);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Character, 1}.raw()));
+  EXPECT_EQ(std::memcmp(res.OffsetElement<char>(), "abc", 3), 0);
+  res.Destroy();
+  RTNAME(Maxloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+  auto mask{MakeArray<TypeCategory::Logical, 1>(
+      shape, std::vector<bool>{false, true, false, true, false, true})};
+  RTNAME(Maxloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/&*mask,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 2);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/&*mask,
+      /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+}
+
+TEST(Reductions, Logical) {
+  std::vector<int> shape{2, 2};
+  auto array{MakeArray<TypeCategory::Logical, 4>(
+      shape, std::vector<std::int32_t>{false, false, true, true})};
+  ASSERT_EQ(array->ElementBytes(), std::size_t{4});
+  EXPECT_EQ(RTNAME(All)(*array, __FILE__, __LINE__), false);
+  EXPECT_EQ(RTNAME(Any)(*array, __FILE__, __LINE__), true);
+  EXPECT_EQ(RTNAME(Count)(*array, __FILE__, __LINE__), 2);
+  StaticDescriptor<2> statDesc;
+  Descriptor &res{statDesc.descriptor()};
+  RTNAME(AllDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(AllDim)(res, *array, /*DIM=*/2, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 0);
+  res.Destroy();
+  RTNAME(AnyDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(AnyDim)(res, *array, /*DIM=*/2, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(CountDim)(res, *array, /*DIM=*/1, /*KIND=*/4, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 2);
+  res.Destroy();
+  RTNAME(CountDim)(res, *array, /*DIM=*/2, /*KIND=*/8, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(1), 1);
+  res.Destroy();
+}
commit	70a94dbd12bada418e323252a041e00ec3c8c0b7	[log] [tgz]
author	peter klausler <pklausler@nvidia.com>	Wed Mar 31 09:14:08 2021 -0700
committer	Copybara-Service <copybara-worker@google.com>	Mon Apr 12 11:23:34 2021 -0700
tree	5e6e8da1cbf152a7146bb7b7735aa36cb9b3e3e3
parent	6cc777ae3d28e3dd5d396365d936a4df46d26542 [diff]