[APInt] Optimize umul_ov

Change two costly udiv() calls to lshr(1)*RHS + left-shift + plus

On one 64-bit umul_ov benchmark, I measured an obvious improvement: 12.8129s -> 3.6257s

Note, there may be some value to special case 64-bit (the most common
case) with __builtin_umulll_overflow().

Differential Revision: https://reviews.llvm.org/D60669

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358730 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 5ed176f..9c59d93 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1914,12 +1914,19 @@
 }
 
 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
-  APInt Res = *this * RHS;
+  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+    Overflow = true;
+    return *this * RHS;
+  }
 
-  if (*this != 0 && RHS != 0)
-    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
-  else
-    Overflow = false;
+  APInt Res = lshr(1) * RHS;
+  Overflow = Res.isNegative();
+  Res <<= 1;
+  if ((*this)[0]) {
+    Res += RHS;
+    if (Res.ult(RHS))
+      Overflow = true;
+  }
   return Res;
 }
 
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 6ef5b25..a92a654 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -2381,6 +2381,42 @@
   }
 }
 
+TEST(APIntTest, umul_ov) {
+  const std::pair<uint64_t, uint64_t> Overflows[] = {
+      {0x8000000000000000, 2},
+      {0x5555555555555556, 3},
+      {4294967296, 4294967296},
+      {4294967295, 4294967298},
+  };
+  const std::pair<uint64_t, uint64_t> NonOverflows[] = {
+      {0x7fffffffffffffff, 2},
+      {0x5555555555555555, 3},
+      {4294967295, 4294967297},
+  };
+
+  bool Overflow;
+  for (auto &X : Overflows) {
+    APInt A(64, X.first);
+    APInt B(64, X.second);
+    (void)A.umul_ov(B, Overflow);
+    EXPECT_TRUE(Overflow);
+  }
+  for (auto &X : NonOverflows) {
+    APInt A(64, X.first);
+    APInt B(64, X.second);
+    (void)A.umul_ov(B, Overflow);
+    EXPECT_FALSE(Overflow);
+  }
+
+  for (unsigned Bits = 1; Bits <= 5; ++Bits)
+    for (unsigned A = 0; A != 1u << Bits; ++A)
+      for (unsigned B = 0; B != 1u << Bits; ++B) {
+        APInt C = APInt(Bits, A).umul_ov(APInt(Bits, B), Overflow);
+        APInt D = APInt(2 * Bits, A) * APInt(2 * Bits, B);
+        EXPECT_TRUE(D.getHiBits(Bits).isNullValue() != Overflow);
+      }
+}
+
 TEST(APIntTest, SolveQuadraticEquationWrap) {
   // Verify that "Solution" is the first non-negative integer that solves
   // Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution