[libc++] Add introsort to avoid O(n^2) behavior

This commit adds a benchmark that tests std::sort on an adversarial inputs,
and uses introsort in std::sort to avoid O(n^2) behavior on adversarial
inputs.

Inputs where partitions are unbalanced even after 2 log(n) pivots have
been selected, the algorithm switches to heap sort to avoid the
possibility of spending O(n^2) time on sorting the input.
Benchmark results show that the intro sort implementation does
significantly better.

Benchmarking results before this change. Time represents the sorting
time required per element:

----------------------------------------------------------------------------------------------------------
Benchmark                                                                Time             CPU   Iterations
----------------------------------------------------------------------------------------------------------
BM_Sort_uint32_QuickSortAdversary_1                                   3.75 ns         3.74 ns    187432960
BM_Sort_uint32_QuickSortAdversary_4                                   3.05 ns         3.05 ns    231211008
BM_Sort_uint32_QuickSortAdversary_16                                  2.45 ns         2.45 ns    288096256
BM_Sort_uint32_QuickSortAdversary_64                                  32.8 ns         32.8 ns     21495808
BM_Sort_uint32_QuickSortAdversary_256                                  132 ns          132 ns      5505024
BM_Sort_uint32_QuickSortAdversary_1024                                 498 ns          497 ns      1572864
BM_Sort_uint32_QuickSortAdversary_16384                               3846 ns         3845 ns       262144
BM_Sort_uint32_QuickSortAdversary_262144                             61431 ns        61400 ns       262144
BM_Sort_uint64_QuickSortAdversary_1                                   3.93 ns         3.92 ns    181141504
BM_Sort_uint64_QuickSortAdversary_4                                   3.10 ns         3.09 ns    222560256
BM_Sort_uint64_QuickSortAdversary_16                                  2.50 ns         2.50 ns    283639808
BM_Sort_uint64_QuickSortAdversary_64                                  33.2 ns         33.2 ns     21757952
BM_Sort_uint64_QuickSortAdversary_256                                  132 ns          132 ns      5505024
BM_Sort_uint64_QuickSortAdversary_1024                                 478 ns          477 ns      1572864
BM_Sort_uint64_QuickSortAdversary_16384                               3932 ns         3930 ns       262144
BM_Sort_uint64_QuickSortAdversary_262144                             61646 ns        61615 ns       262144

Benchmarking results after this change:

----------------------------------------------------------------------------------------------------------
Benchmark                                                                Time             CPU   Iterations
----------------------------------------------------------------------------------------------------------
BM_Sort_uint32_QuickSortAdversary_1                                   6.31 ns         6.30 ns    107741184
BM_Sort_uint32_QuickSortAdversary_4                                   4.51 ns         4.50 ns    158859264
BM_Sort_uint32_QuickSortAdversary_16                                  3.00 ns         3.00 ns    223608832
BM_Sort_uint32_QuickSortAdversary_64                                  44.8 ns         44.8 ns     15990784
BM_Sort_uint32_QuickSortAdversary_256                                 69.0 ns         68.9 ns      9961472
BM_Sort_uint32_QuickSortAdversary_1024                                 118 ns          118 ns      6029312
BM_Sort_uint32_QuickSortAdversary_16384                                175 ns          175 ns      4194304
BM_Sort_uint32_QuickSortAdversary_262144                               210 ns          210 ns      3407872
BM_Sort_uint64_QuickSortAdversary_1                                   6.75 ns         6.73 ns    103809024
BM_Sort_uint64_QuickSortAdversary_4                                   4.53 ns         4.53 ns    160432128
BM_Sort_uint64_QuickSortAdversary_16                                  2.98 ns         2.97 ns    234356736
BM_Sort_uint64_QuickSortAdversary_64                                  44.3 ns         44.3 ns     15990784
BM_Sort_uint64_QuickSortAdversary_256                                 69.2 ns         69.2 ns     10223616
BM_Sort_uint64_QuickSortAdversary_1024                                 119 ns          119 ns      6029312
BM_Sort_uint64_QuickSortAdversary_16384                                173 ns          173 ns      4194304
BM_Sort_uint64_QuickSortAdversary_262144                               212 ns          212 ns      3407872

Differential Revision: https://reviews.llvm.org/D113413

GitOrigin-RevId: 7f287390d78d301956e8e925a84349fd4408a11e
diff --git a/benchmarks/algorithms.bench.cpp b/benchmarks/algorithms.bench.cpp
index 564d89d..e5dd37f 100644
--- a/benchmarks/algorithms.bench.cpp
+++ b/benchmarks/algorithms.bench.cpp
@@ -38,18 +38,65 @@
   Descending,
   SingleElement,
   PipeOrgan,
-  Heap
+  Heap,
+  QuickSortAdversary,
 };
-struct AllOrders : EnumValuesAsTuple<AllOrders, Order, 6> {
+struct AllOrders : EnumValuesAsTuple<AllOrders, Order, 7> {
   static constexpr const char* Names[] = {"Random",     "Ascending",
                                           "Descending", "SingleElement",
-                                          "PipeOrgan",  "Heap"};
+                                          "PipeOrgan",  "Heap",
+                                          "QuickSortAdversary"};
 };
 
+// fillAdversarialQuickSortInput fills the input vector with N int-like values.
+// These values are arranged in such a way that they would invoke O(N^2)
+// behavior on any quick sort implementation that satisifies certain conditions.
+// Details are available in the following paper:
+// "A Killer Adversary for Quicksort", M. D. McIlroy, Software—Practice &
+// ExperienceVolume 29 Issue 4 April 10, 1999 pp 341–344.
+// https://dl.acm.org/doi/10.5555/311868.311871.
+template <class T>
+void fillAdversarialQuickSortInput(T& V, size_t N) {
+  assert(N > 0);
+  // If an element is equal to gas, it indicates that the value of the element
+  // is still to be decided and may change over the course of time.
+  const int gas = N - 1;
+  V.resize(N);
+  for (int i = 0; i < N; ++i) {
+    V[i] = gas;
+  }
+  // Candidate for the pivot position.
+  int candidate = 0;
+  int nsolid = 0;
+  // Populate all positions in the generated input to gas.
+  std::vector<int> ascVals(V.size());
+  // Fill up with ascending values from 0 to V.size()-1.  These will act as
+  // indices into V.
+  std::iota(ascVals.begin(), ascVals.end(), 0);
+  std::sort(ascVals.begin(), ascVals.end(), [&](int x, int y) {
+    if (V[x] == gas && V[y] == gas) {
+      // We are comparing two inputs whose value is still to be decided.
+      if (x == candidate) {
+        V[x] = nsolid++;
+      } else {
+        V[y] = nsolid++;
+      }
+    }
+    if (V[x] == gas) {
+      candidate = x;
+    } else if (V[y] == gas) {
+      candidate = y;
+    }
+    return V[x] < V[y];
+  });
+}
+
 template <typename T>
 void fillValues(std::vector<T>& V, size_t N, Order O) {
   if (O == Order::SingleElement) {
     V.resize(N, 0);
+  } else if (O == Order::QuickSortAdversary) {
+    fillAdversarialQuickSortInput(V, N);
   } else {
     while (V.size() < N)
       V.push_back(V.size());
@@ -128,6 +175,9 @@
   case Order::Heap:
     std::make_heap(V.begin(), V.end());
     break;
+  case Order::QuickSortAdversary:
+    // Nothing to do
+    break;
   }
 }
 
diff --git a/include/__algorithm/sort.h b/include/__algorithm/sort.h
index 0ab6c44..c0b602b 100644
--- a/include/__algorithm/sort.h
+++ b/include/__algorithm/sort.h
@@ -263,12 +263,14 @@
 
 template <class _Compare, class _RandomAccessIterator>
 void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+__introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+            typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __depth)
 {
     typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
     typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
     const difference_type __limit = is_trivially_copy_constructible<value_type>::value &&
                                     is_trivially_copy_assignable<value_type>::value ? 30 : 6;
+    typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
     while (true)
     {
     __restart:
@@ -298,6 +300,13 @@
             return;
         }
         // __len > 5
+        if (__depth == 0)
+        {
+          // Fallback to heap sort as Introsort suggests.
+          _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
+          return;
+        }
+        --__depth;
         _RandomAccessIterator __m = __first;
         _RandomAccessIterator __lm1 = __last;
         --__lm1;
@@ -440,19 +449,34 @@
         // sort smaller range with recursive call and larger with tail recursion elimination
         if (__i - __first < __last - __i)
         {
-            _VSTD::__sort<_Compare>(__first, __i, __comp);
-            // _VSTD::__sort<_Compare>(__i+1, __last, __comp);
-            __first = ++__i;
+          _VSTD::__introsort<_Compare>(__first, __i, __comp, __depth);
+          __first = ++__i;
         }
         else
         {
-            _VSTD::__sort<_Compare>(__i+1, __last, __comp);
-            // _VSTD::__sort<_Compare>(__first, __i, __comp);
-            __last = __i;
+          _VSTD::__introsort<_Compare>(__i + 1, __last, __comp, __depth);
+          __last = __i;
         }
     }
 }
 
+template <typename _Number>
+inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
+  _Number __log2 = 0;
+  while (__n > 1) {
+    __log2++;
+    __n >>= 1;
+  }
+  return __log2;
+}
+
+template <class _Compare, class _RandomAccessIterator>
+void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+  difference_type __depth_limit = 2 * __log2i(__last - __first);
+  __introsort(__first, __last, __comp, __depth_limit);
+}
+
 template <class _Compare, class _Tp>
 inline _LIBCPP_INLINE_VISIBILITY
 void
diff --git a/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp b/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
index 3058b64..f9ab97c 100644
--- a/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
+++ b/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
@@ -147,6 +147,63 @@
     assert(*pv[array_size - 1] == v[array_size - 1]);
 }
 
+// test_adversarial_quicksort generates a vector with values arranged in such a
+// way that they would invoke O(N^2) behavior on any quick sort implementation
+// that satisifies certain conditions.  Details are available in the following
+// paper:
+// "A Killer Adversary for Quicksort", M. D. McIlroy, Software—Practice &
+// ExperienceVolume 29 Issue 4 April 10, 1999 pp 341–344.
+// https://dl.acm.org/doi/10.5555/311868.311871.
+struct AdversaryComparator {
+  AdversaryComparator(int N, std::vector<int>& input) : gas(N - 1), V(input) {
+    V.resize(N);
+    // Populate all positions in the generated input to gas to indicate that
+    // none of the values have been fixed yet.
+    for (int i = 0; i < N; ++i)
+      V[i] = gas;
+  }
+
+  bool operator()(int x, int y) {
+    if (V[x] == gas && V[y] == gas) {
+      // We are comparing two inputs whose value is still to be decided.
+      if (x == candidate) {
+        V[x] = nsolid++;
+      } else {
+        V[y] = nsolid++;
+      }
+    }
+    if (V[x] == gas) {
+      candidate = x;
+    } else if (V[y] == gas) {
+      candidate = y;
+    }
+    return V[x] < V[y];
+  }
+
+private:
+  // If an element is equal to gas, it indicates that the value of the element
+  // is still to be decided and may change over the course of time.
+  const int gas;
+  // This is a reference so that we can manipulate the input vector later.
+  std::vector<int>& V;
+  // Candidate for the pivot position.
+  int candidate = 0;
+  int nsolid = 0;
+};
+
+void test_adversarial_quicksort(int N) {
+  assert(N > 0);
+  std::vector<int> ascVals(N);
+  // Fill up with ascending values from 0 to N-1.  These will act as indices
+  // into V.
+  std::iota(ascVals.begin(), ascVals.end(), 0);
+  std::vector<int> V;
+  AdversaryComparator comp(N, V);
+  std::sort(ascVals.begin(), ascVals.end(), comp);
+  std::sort(V.begin(), V.end());
+  assert(std::is_sorted(V.begin(), V.end()));
+}
+
 int main(int, char**)
 {
     // test null range
@@ -171,6 +228,7 @@
     test_larger_sorts(1009);
 
     test_pointer_sort();
+    test_adversarial_quicksort(1 << 20);
 
-  return 0;
+    return 0;
 }