[flang] Use a "double-double" accumulator in SUM Use a "double-double" accumulator, a/k/a Kahan summation, in the SUM intrinsic in the runtime for real & complex. This seems to be the best-recommended technique for reducing error, as opposed to the initial implementation of SUM's distinct accumulators for positive and negative items. Differential Revision: https://reviews.llvm.org/D104338 GitOrigin-RevId: c375ec86132984bc3915323fbe3f8c71f4d5503f

commit: f1add8b3a2cd3174ff33fd0bd39fd0e911961121 [log] [tgz]
author: peter klausler <pklausler@nvidia.com> Tue Jun 15 15:19:18 2021 -0700
committer: Copybara-Service <copybara-worker@google.com> Wed Jun 16 18:36:53 2021 -0700
tree: a9d78c81a14bb9055bcfdc4b45acd284c1a93148
parent: 5cab9bb5a13a26f046fe71a499696e00007d0a56 [diff]
diff --git a/runtime/sum.cpp b/runtime/sum.cpp
index db808b2..fb32505 100644
--- a/runtime/sum.cpp
+++ b/runtime/sum.cpp

@@ -9,8 +9,8 @@
 // Implements SUM for all required operand types and shapes.
 //
 // Real and complex SUM reductions attempt to reduce floating-point
-// cancellation on intermediate results by adding up partial sums
-// for positive and negative elements independently.
+// cancellation on intermediate results by using "Kahan summation"
+// (basically the same as manual "double-double").
 
 #include "reduction-templates.h"
 #include "reduction.h"
@@ -40,24 +40,17 @@
 template <typename INTERMEDIATE> class RealSumAccumulator {
 public:
   explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { positives_ = negatives_ = inOrder_ = 0; }
-  template <typename A> A Result() const {
-    auto sum{static_cast<A>(positives_ + negatives_)};
-    return std::isfinite(sum) ? sum : static_cast<A>(inOrder_);
-  }
+  void Reinitialize() { sum_ = correction_ = 0; }
+  template <typename A> A Result() const { return sum_; }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = Result<A>();
   }
   template <typename A> bool Accumulate(A x) {
-    // Accumulate the nonnegative and negative elements independently
-    // to reduce cancellation; also record an in-order sum for use
-    // in case of overflow.
-    if (x >= 0) {
-      positives_ += x;
-    } else {
-      negatives_ += x;
-    }
-    inOrder_ += x;
+    // Kahan summation
+    auto next{x + correction_};
+    auto oldSum{sum_};
+    sum_ += next;
+    correction_ = (sum_ - oldSum) - next; // algebraically zero
     return true;
   }
   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
@@ -66,7 +59,7 @@
 
 private:
   const Descriptor &array_;
-  INTERMEDIATE positives_{0.0}, negatives_{0.0}, inOrder_{0.0};
+  INTERMEDIATE sum_{0.0}, correction_{0.0};
 };
 
 template <typename PART> class ComplexSumAccumulator {
commit	f1add8b3a2cd3174ff33fd0bd39fd0e911961121	[log] [tgz]
author	peter klausler <pklausler@nvidia.com>	Tue Jun 15 15:19:18 2021 -0700
committer	Copybara-Service <copybara-worker@google.com>	Wed Jun 16 18:36:53 2021 -0700
tree	a9d78c81a14bb9055bcfdc4b45acd284c1a93148
parent	5cab9bb5a13a26f046fe71a499696e00007d0a56 [diff]