[libcxx/variant] Introduce `switch`-based mechanism for `std::visit`.

This patch introduces mechanism for `std::visit` backed by `switch`.
The `switch` is structured such that it's a flattened manual vtable (an n-ary array).
The `switch` mechanism is enabled if `(1 * ... * vs.size()) < 1024`.

The following are performance numbers from the benchmarks added in D85419, tested on my 2017 Macbook Pro.

```
$ ./projects/libcxx/benchmarks/variant_visit_1.libcxx.out
2020-08-09 23:55:14
Running ./projects/libcxx/benchmarks/variant_visit_1.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 2.03, 2.36, 2.43
------------------------------------------------------------
Benchmark                 Time             CPU   Iterations
------------------------------------------------------------
BM_Visit<1, 1>        0.260 ns        0.260 ns   1000000000
BM_Visit<1, 2>         1.56 ns         1.56 ns    435925220
BM_Visit<1, 3>         1.55 ns         1.55 ns    444416228
BM_Visit<1, 4>         1.57 ns         1.57 ns    427951336
BM_Visit<1, 5>         1.57 ns         1.56 ns    444766371
BM_Visit<1, 6>         1.70 ns         1.68 ns    446639358
BM_Visit<1, 7>         1.64 ns         1.64 ns    400441630
BM_Visit<1, 8>         1.56 ns         1.56 ns    430729471
BM_Visit<1, 9>         1.58 ns         1.58 ns    449894596
BM_Visit<1, 10>        1.54 ns         1.54 ns    449660506
BM_Visit<1, 20>        1.56 ns         1.56 ns    450813074
BM_Visit<1, 30>        1.59 ns         1.59 ns    440032940
BM_Visit<1, 40>        1.59 ns         1.59 ns    443731656
BM_Visit<1, 50>        1.56 ns         1.56 ns    444709859
BM_Visit<1, 60>        1.59 ns         1.58 ns    439527320
BM_Visit<1, 70>        1.57 ns         1.57 ns    438450890
BM_Visit<1, 80>        1.58 ns         1.58 ns    443001525
BM_Visit<1, 90>        1.63 ns         1.62 ns    448456349
BM_Visit<1, 100>       1.57 ns         1.57 ns    445740630

$ ./projects/libcxx/benchmarks/variant_visit_2.libcxx.out
2020-08-09 23:59:35
Running ./projects/libcxx/benchmarks/variant_visit_2.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 1.40, 1.94, 2.22
-----------------------------------------------------------
Benchmark                Time             CPU   Iterations
-----------------------------------------------------------
BM_Visit<2, 1>       0.261 ns        0.260 ns   1000000000
BM_Visit<2, 2>        1.55 ns         1.54 ns    432844219
BM_Visit<2, 3>        1.30 ns         1.30 ns    532529974
BM_Visit<2, 4>        1.54 ns         1.54 ns    446055910
BM_Visit<2, 5>        1.31 ns         1.31 ns    531099680
BM_Visit<2, 6>        1.56 ns         1.56 ns    443203475
BM_Visit<2, 7>        1.29 ns         1.29 ns    526478087
BM_Visit<2, 8>        1.56 ns         1.56 ns    439000834
BM_Visit<2, 9>        1.30 ns         1.30 ns    528756817
BM_Visit<2, 10>       1.56 ns         1.55 ns    442923039
BM_Visit<2, 20>       1.35 ns         1.35 ns    517021072
BM_Visit<2, 30>       1.60 ns         1.59 ns    419724661
BM_Visit<2, 40>       1.45 ns         1.44 ns    472137163
BM_Visit<2, 50>       1.65 ns         1.65 ns    421389743

$ ./projects/libcxx/benchmarks/variant_visit_3.libcxx.out
2020-08-10 00:01:32
Running ./projects/libcxx/benchmarks/variant_visit_3.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 2.20, 2.01, 2.21
-----------------------------------------------------------
Benchmark                Time             CPU   Iterations
-----------------------------------------------------------
BM_Visit<3, 1>       0.272 ns        0.271 ns   1000000000
BM_Visit<3, 2>        1.87 ns         1.86 ns    361858090
BM_Visit<3, 3>        1.77 ns         1.77 ns    391192579
BM_Visit<3, 4>        1.84 ns         1.84 ns    374694223
BM_Visit<3, 5>        1.75 ns         1.75 ns    408270392
BM_Visit<3, 6>        1.88 ns         1.88 ns    378759185
BM_Visit<3, 7>        1.79 ns         1.79 ns    395498102
BM_Visit<3, 8>        1.85 ns         1.85 ns    371660366
BM_Visit<3, 9>        1.80 ns         1.80 ns    386872851
BM_Visit<3, 10>       1.84 ns         1.84 ns    362367606
BM_Visit<3, 15>       1.77 ns         1.77 ns    392060220
BM_Visit<3, 20>       1.85 ns         1.85 ns    379157188
```

```
$ ./projects/libcxx/benchmarks/variant_visit_1.libcxx.out
2020-08-10 00:05:57
Running ./projects/libcxx/benchmarks/variant_visit_1.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 2.27, 2.36, 2.34
------------------------------------------------------------
Benchmark                 Time             CPU   Iterations
------------------------------------------------------------
BM_Visit<1, 1>        0.271 ns        0.271 ns   1000000000
BM_Visit<1, 2>        0.269 ns        0.269 ns   1000000000
BM_Visit<1, 3>        0.271 ns        0.271 ns   1000000000
BM_Visit<1, 4>        0.270 ns        0.270 ns   1000000000
BM_Visit<1, 5>        0.269 ns        0.269 ns   1000000000
BM_Visit<1, 6>        0.270 ns        0.269 ns   1000000000
BM_Visit<1, 7>        0.265 ns        0.265 ns   1000000000
BM_Visit<1, 8>        0.269 ns        0.269 ns   1000000000
BM_Visit<1, 9>        0.268 ns        0.268 ns   1000000000
BM_Visit<1, 10>       0.269 ns        0.269 ns   1000000000
BM_Visit<1, 20>       0.267 ns        0.267 ns   1000000000
BM_Visit<1, 30>       0.272 ns        0.272 ns   1000000000
BM_Visit<1, 40>       0.268 ns        0.268 ns   1000000000
BM_Visit<1, 50>       0.268 ns        0.268 ns   1000000000
BM_Visit<1, 60>       0.268 ns        0.268 ns   1000000000
BM_Visit<1, 70>       0.269 ns        0.269 ns   1000000000
BM_Visit<1, 80>       0.266 ns        0.266 ns   1000000000
BM_Visit<1, 90>       0.268 ns        0.268 ns   1000000000
BM_Visit<1, 100>      0.267 ns        0.267 ns   1000000000

$ ./projects/libcxx/benchmarks/variant_visit_2.libcxx.out
2020-08-12 04:09:59
Running ./projects/libcxx/benchmarks/variant_visit_2.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 2.17, 4.20, 4.78
-----------------------------------------------------------
Benchmark                Time             CPU   Iterations
-----------------------------------------------------------
BM_Visit<2, 1>       0.302 ns        0.301 ns   1000000000
BM_Visit<2, 2>       0.297 ns        0.295 ns   1000000000
BM_Visit<2, 3>       0.353 ns        0.351 ns   1000000000
BM_Visit<2, 4>       0.276 ns        0.276 ns   1000000000
BM_Visit<2, 5>       0.285 ns        0.283 ns   1000000000
BM_Visit<2, 6>       0.290 ns        0.287 ns   1000000000
BM_Visit<2, 7>       0.282 ns        0.280 ns   1000000000
BM_Visit<2, 8>       0.290 ns        0.287 ns   1000000000
BM_Visit<2, 9>       0.291 ns        0.285 ns   1000000000
BM_Visit<2, 10>      0.293 ns        0.287 ns   1000000000
BM_Visit<2, 20>       1.70 ns         1.68 ns    391400375
BM_Visit<2, 30>       1.64 ns         1.63 ns    418925874
BM_Visit<2, 40>       1.63 ns         1.62 ns    423623677
BM_Visit<2, 50>       1.68 ns         1.67 ns    411687212

$ ./projects/libcxx/benchmarks/variant_visit_3.libcxx.out
2020-08-12 04:10:43
Running ./projects/libcxx/benchmarks/variant_visit_3.libcxx.out
Run on (8 X 3100 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 8388K (x1)
Load Average: 1.57, 3.76, 4.59
-----------------------------------------------------------
Benchmark                Time             CPU   Iterations
-----------------------------------------------------------
BM_Visit<3, 1>       0.271 ns        0.270 ns   1000000000
BM_Visit<3, 2>       0.344 ns        0.334 ns   1000000000
BM_Visit<3, 3>       0.347 ns        0.336 ns   1000000000
BM_Visit<3, 4>       0.300 ns        0.296 ns   1000000000
BM_Visit<3, 5>       0.290 ns        0.286 ns   1000000000
BM_Visit<3, 6>       0.272 ns        0.271 ns   1000000000
BM_Visit<3, 7>        1.72 ns         1.71 ns    415765841
BM_Visit<3, 8>        1.73 ns         1.72 ns    408909555
BM_Visit<3, 9>        2.16 ns         2.04 ns    380898485
BM_Visit<3, 10>       2.45 ns         2.40 ns    295714256
BM_Visit<3, 15>       1.92 ns         1.85 ns    375990332
BM_Visit<3, 20>       1.66 ns         1.65 ns    414456233
```

Differential Revision: https://reviews.llvm.org/D85420

GitOrigin-RevId: a175a96517c5d9dc05ba13a6481b1b031a53a22f
diff --git a/include/variant b/include/variant
index 33d5dc7..c18ef55 100644
--- a/include/variant
+++ b/include/variant
@@ -354,9 +354,9 @@
 
 enum class _Trait { _TriviallyAvailable, _Available, _Unavailable };
 
-template <typename _Tp,
-          template <typename> class _IsTriviallyAvailable,
-          template <typename> class _IsAvailable>
+template <class _Tp,
+          template <class> class _IsTriviallyAvailable,
+          template <class> class _IsAvailable>
 constexpr _Trait __trait =
     _IsTriviallyAvailable<_Tp>::value
         ? _Trait::_TriviallyAvailable
@@ -373,7 +373,7 @@
   return __result;
 }
 
-template <typename... _Types>
+template <class... _Types>
 struct __traits {
   static constexpr _Trait __copy_constructible_trait =
       __common_trait({__trait<_Types,
@@ -434,183 +434,270 @@
 
 namespace __visitation {
 
+#define _LIBCPP_VARIANT_CASES_4(_Case, _Base)                                  \
+  _Case(_Base + 0)                                                             \
+  _Case(_Base + 1)                                                             \
+  _Case(_Base + 2)                                                             \
+  _Case(_Base + 3)
+
+#define _LIBCPP_VARIANT_CASES_16(_Case, _Base)                                 \
+  _LIBCPP_VARIANT_CASES_4(_Case, _Base + 4 * 0)                                \
+  _LIBCPP_VARIANT_CASES_4(_Case, _Base + 4 * 1)                                \
+  _LIBCPP_VARIANT_CASES_4(_Case, _Base + 4 * 2)                                \
+  _LIBCPP_VARIANT_CASES_4(_Case, _Base + 4 * 3)
+
+#define _LIBCPP_VARIANT_CASES_64(_Case, _Base)                                 \
+  _LIBCPP_VARIANT_CASES_16(_Case, _Base + 16 * 0)                              \
+  _LIBCPP_VARIANT_CASES_16(_Case, _Base + 16 * 1)                              \
+  _LIBCPP_VARIANT_CASES_16(_Case, _Base + 16 * 2)                              \
+  _LIBCPP_VARIANT_CASES_16(_Case, _Base + 16 * 3)
+
+#define _LIBCPP_VARIANT_CASES_256(_Case, _Base)                                \
+  _LIBCPP_VARIANT_CASES_64(_Case, _Base + 64 * 0)                              \
+  _LIBCPP_VARIANT_CASES_64(_Case, _Base + 64 * 1)                              \
+  _LIBCPP_VARIANT_CASES_64(_Case, _Base + 64 * 2)                              \
+  _LIBCPP_VARIANT_CASES_64(_Case, _Base + 64 * 3)
+
+#define _LIBCPP_VARIANT_CASES(_NumCases, _Case)                                \
+  _LIBCPP_CONCAT(_LIBCPP_VARIANT_CASES_, _NumCases)(_Case, 0)
+
+#define _LIBCPP_VARIANT_SWITCH_MAX 256
+
+template <class _Iter, class _Fp, size_t... _Is>
+inline _LIBCPP_INLINE_VISIBILITY
+static constexpr void __fill_cartesian_impl(
+    _Iter __iter, _Fp __f, index_sequence<_Is...>) {
+  *__iter = __f(integral_constant<size_t, _Is>{}...);
+}
+
+template <class _Iter, class _Fp, size_t... _Is, size_t... _Js, class... _Ls>
+inline _LIBCPP_INLINE_VISIBILITY
+static constexpr void __fill_cartesian_impl(
+    _Iter __iter, _Fp __f, index_sequence<_Is...>, index_sequence<_Js...>, _Ls... __ls) {
+  constexpr size_t _Mp = (1 * ... * _Ls::size());
+  (__fill_cartesian_impl(
+      __iter + _Js * _Mp, __f, index_sequence<_Is..., _Js>{}, __ls...), ...);
+}
+
+template <size_t... _Ns, class _Iter, class _Fp>
+inline _LIBCPP_INLINE_VISIBILITY
+static constexpr void __fill_cartesian(_Iter __iter, _Fp __f) {
+  __fill_cartesian_impl(
+      __iter, __f, index_sequence<>{}, make_index_sequence<_Ns>{}...);
+}
+
+template <size_t _Np, size_t... _Ns>
+struct __multi {
+  inline _LIBCPP_INLINE_VISIBILITY
+  static constexpr size_t __size = (_Np * ... * _Ns);
+
+  inline _LIBCPP_INLINE_VISIBILITY
+  static constexpr size_t
+  __index(const size_t (&__is)[sizeof...(_Ns) + 1]) noexcept {
+    constexpr size_t __ns[] = {_Ns..., 1};
+    size_t __result = 0;
+    for (size_t __i = 0; __i < sizeof...(_Ns) + 1; ++__i) {
+      if (__is[__i] == variant_npos) {
+        return variant_npos;
+      }
+      __result += __is[__i];
+      __result *= __ns[__i];
+    }
+    return __result;
+  }
+};
+
+template <size_t... _Ns>
+struct __indices {
+  inline _LIBCPP_INLINE_VISIBILITY
+  static constexpr auto __value = [] {
+    using _Tp = array<size_t, sizeof...(_Ns)>;
+    array<_Tp, (1 * ... * _Ns)> __result = {};
+    __fill_cartesian<_Ns...>(__result.begin(),
+                             [](auto... __is) -> _Tp { return {__is...}; });
+    return __result;
+  }();
+};
+
+template <size_t... _Ns, class _Fp, class _Rp, class... _Args>
+inline _LIBCPP_INLINE_VISIBILITY
+static constexpr auto __make_vtable_impl(_Fp __f, _Rp (*)(_Args...)) {
+  array<_Rp (*)(_Args...), (1 * ... * _Ns) + 1> __result = {
+      [](_Args...) -> _Rp { __throw_bad_variant_access(); }
+  };
+  __fill_cartesian<_Ns...>(__result.begin() + 1, __f);
+  return __result;
+}
+
+template <size_t... _Ns, class _Fp>
+inline _LIBCPP_INLINE_VISIBILITY
+static constexpr auto __make_vtable(_Fp __f) {
+  using _Tp = decltype(__f(integral_constant<size_t, (_Ns, 0)>{}...));
+  return __make_vtable_impl<_Ns...>(__f, _Tp{});
+}
+
 struct __base {
-  template <class _Visitor, class... _Vs>
+  template <class _Vis, class _Vp, class _Wp>
   inline _LIBCPP_INLINE_VISIBILITY
   static constexpr decltype(auto)
-  __visit_alt_at(size_t __index, _Visitor&& __visitor, _Vs&&... __vs) {
-    constexpr auto __fdiagonal =
-        __make_fdiagonal<_Visitor&&,
-                         decltype(_VSTD::forward<_Vs>(__vs).__as_base())...>();
-    return __fdiagonal[__index](_VSTD::forward<_Visitor>(__visitor),
-                                _VSTD::forward<_Vs>(__vs).__as_base()...);
-  }
-
-  template <class _Visitor, class... _Vs>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr decltype(auto) __visit_alt(_Visitor&& __visitor,
-                                              _Vs&&... __vs) {
-    constexpr auto __fmatrix =
-        __make_fmatrix<_Visitor&&,
-                       decltype(_VSTD::forward<_Vs>(__vs).__as_base())...>();
-    return __at(__fmatrix, __vs.index()...)(
-        _VSTD::forward<_Visitor>(__visitor),
-        _VSTD::forward<_Vs>(__vs).__as_base()...);
-  }
-
-private:
-  template <class _Tp>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr const _Tp& __at(const _Tp& __elem) { return __elem; }
-
-  template <class _Tp, size_t _Np, typename... _Indices>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto&& __at(const array<_Tp, _Np>& __elems,
-                               size_t __index, _Indices... __indices) {
-    return __at(__elems[__index], __indices...);
-  }
-
-  template <class _Fp, class... _Fs>
-  static constexpr void __std_visit_visitor_return_type_check() {
-    static_assert(
-        __all<is_same_v<_Fp, _Fs>...>::value,
-        "`std::visit` requires the visitor to have a single return type.");
-  }
-
-  template <class... _Fs>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_farray(_Fs&&... __fs) {
-    __std_visit_visitor_return_type_check<__uncvref_t<_Fs>...>();
-    using __result = array<common_type_t<__uncvref_t<_Fs>...>, sizeof...(_Fs)>;
-    return __result{{_VSTD::forward<_Fs>(__fs)...}};
-  }
-
-  template <std::size_t... _Is>
-  struct __dispatcher {
-    template <class _Fp, class... _Vs>
-    inline _LIBCPP_INLINE_VISIBILITY
-    static constexpr decltype(auto) __dispatch(_Fp __f, _Vs... __vs) {
+  __visit_alt_at(size_t __index, _Vis&& __vis, _Vp&& __v, _Wp&& __w) {
+    constexpr size_t __size = __uncvref_t<_Vp>::__size();
+    static_assert(__size == __uncvref_t<_Wp>::__size());
+    constexpr auto __dispatch = [](auto __i) {
+      return +[](_Vis&& __vis, _Vp&& __v, _Wp&& __w) -> decltype(auto) {
+        constexpr size_t _Ip = decltype(__i)::value;
         return __invoke_constexpr(
-            static_cast<_Fp>(__f),
-            __access::__base::__get_alt<_Is>(static_cast<_Vs>(__vs))...);
+            _VSTD::forward<_Vis>(__vis),
+            __access::__base::__get_alt<_Ip>(_VSTD::forward<_Vp>(__v)),
+            __access::__base::__get_alt<_Ip>(_VSTD::forward<_Wp>(__w)));
+      };
+    };
+#define _LIBCPP_VARIANT_CASE(_Ip)                                              \
+  case _Ip: {                                                                  \
+    if constexpr (_Ip < __size) {                                              \
+      return __dispatch(integral_constant<size_t, _Ip>{})(                     \
+          _VSTD::forward<_Vis>(__vis),                                         \
+          _VSTD::forward<_Vp>(__v),                                            \
+          _VSTD::forward<_Wp>(__w));                                           \
+    } else {                                                                   \
+      _LIBCPP_UNREACHABLE();                                                   \
+    }                                                                          \
+  }
+    if constexpr (__size <= _LIBCPP_VARIANT_SWITCH_MAX) {
+      switch (__index) {
+        _LIBCPP_VARIANT_CASES(_LIBCPP_VARIANT_SWITCH_MAX, _LIBCPP_VARIANT_CASE)
+        default: __throw_bad_variant_access();
+      }
+    } else {
+      constexpr auto __vtable = __make_vtable<__size>(__dispatch);
+      return __vtable[__index + 1](_VSTD::forward<_Vis>(__vis),
+                                   _VSTD::forward<_Vp>(__v),
+                                   _VSTD::forward<_Wp>(__w));
     }
-  };
-
-  template <class _Fp, class... _Vs, size_t... _Is>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_dispatch(index_sequence<_Is...>) {
-    return __dispatcher<_Is...>::template __dispatch<_Fp, _Vs...>;
+#undef _LIBCPP_VARIANT_CASE
   }
 
-  template <size_t _Ip, class _Fp, class... _Vs>
+  template <size_t... _Is, class _Vis, class... _Vs>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fdiagonal_impl() {
-    return __make_dispatch<_Fp, _Vs...>(
-        index_sequence<(__identity<_Vs>{}, _Ip)...>{});
+  static constexpr decltype(auto) __visit_alt(_Vis&& __vis, _Vs&&... __vs) {
+    if constexpr (sizeof...(_Vs) == 0) {
+      return __invoke_constexpr(_VSTD::forward<_Vis>(__vis));
+    } else {
+      return __visit_alt_impl(index_sequence_for<_Vs...>{},
+                              _VSTD::forward<_Vis>(__vis),
+                              _VSTD::forward<_Vs>(__vs)...);
+    }
   }
 
-  template <class _Fp, class... _Vs, size_t... _Is>
+  template <size_t... _Is, class _Vis, class... _Vs>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fdiagonal_impl(index_sequence<_Is...>) {
-    return __base::__make_farray(__make_fdiagonal_impl<_Is, _Fp, _Vs...>()...);
+  static constexpr decltype(auto)
+  __visit_alt_impl(index_sequence<_Is...>, _Vis&& __vis, _Vs&&... __vs) {
+    using __multi = __multi<__uncvref_t<_Vs>::__size()...>;
+    constexpr auto __dispatch = [](auto... __is) {
+      return +[](_Vis&& __vis, _Vs&&... __vs) {
+        return __invoke_constexpr(
+            _VSTD::forward<_Vis>(__vis),
+            __access::__base::__get_alt<decltype(__is)::value>(
+                _VSTD::forward<_Vs>(__vs))...);
+      };
+    };
+#define _LIBCPP_VARIANT_CASE(_Ip)                                              \
+  case _Ip: {                                                                  \
+    if constexpr (_Ip < __multi::__size) {                                     \
+      return __dispatch(integral_constant<size_t, __itable[_Ip][_Is]>{}...)(   \
+          _VSTD::forward<_Vis>(__vis), _VSTD::forward<_Vs>(__vs)...);          \
+    } else {                                                                   \
+      _LIBCPP_UNREACHABLE();                                                   \
+    }                                                                          \
   }
-
-  template <class _Fp, class _Vp, class... _Vs>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fdiagonal() {
-    constexpr size_t _Np = __uncvref_t<_Vp>::__size();
-    static_assert(__all<(_Np == __uncvref_t<_Vs>::__size())...>::value);
-    return __make_fdiagonal_impl<_Fp, _Vp, _Vs...>(make_index_sequence<_Np>{});
-  }
-
-  template <class _Fp, class... _Vs, size_t... _Is>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fmatrix_impl(index_sequence<_Is...> __is) {
-    return __make_dispatch<_Fp, _Vs...>(__is);
-  }
-
-  template <class _Fp, class... _Vs, size_t... _Is, size_t... _Js, class... _Ls>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fmatrix_impl(index_sequence<_Is...>,
-                                            index_sequence<_Js...>,
-                                            _Ls... __ls) {
-    return __base::__make_farray(__make_fmatrix_impl<_Fp, _Vs...>(
-        index_sequence<_Is..., _Js>{}, __ls...)...);
-  }
-
-  template <class _Fp, class... _Vs>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_fmatrix() {
-    return __make_fmatrix_impl<_Fp, _Vs...>(
-        index_sequence<>{}, make_index_sequence<__uncvref_t<_Vs>::__size()>{}...);
+    if constexpr (__multi::__size <= _LIBCPP_VARIANT_SWITCH_MAX) {
+      constexpr const auto& __itable =
+          __indices<__uncvref_t<_Vs>::__size()...>::__value;
+      switch (__multi::__index({__vs.index()...})) {
+        _LIBCPP_VARIANT_CASES(_LIBCPP_VARIANT_SWITCH_MAX, _LIBCPP_VARIANT_CASE)
+        default: __throw_bad_variant_access();
+      }
+    } else {
+      constexpr auto __vtable =
+          __make_vtable<__uncvref_t<_Vs>::__size()...>(__dispatch);
+      return __vtable[__multi::__index({__vs.index()...}) + 1](
+          _VSTD::forward<_Vis>(__vis), _VSTD::forward<_Vs>(__vs)...);
+    }
+#undef _LIBCPP_VARIANT_CASE
   }
 };
 
 struct __variant {
-  template <class _Visitor, class... _Vs>
+  template <class _Vis, class _Vp, class _Wp>
   inline _LIBCPP_INLINE_VISIBILITY
   static constexpr decltype(auto)
-  __visit_alt_at(size_t __index, _Visitor&& __visitor, _Vs&&... __vs) {
+  __visit_alt_at(size_t __index, _Vis&& __vis, _Vp&& __v, _Wp&& __w) {
     return __base::__visit_alt_at(__index,
-                                  _VSTD::forward<_Visitor>(__visitor),
-                                  _VSTD::forward<_Vs>(__vs).__impl...);
+                                  _VSTD::forward<_Vis>(__vis),
+                                  _VSTD::forward<_Vp>(__v).__impl,
+                                  _VSTD::forward<_Wp>(__w).__impl);
   }
 
-  template <class _Visitor, class... _Vs>
+  template <class _Vis, class... _Vs>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr decltype(auto) __visit_alt(_Visitor&& __visitor,
-                                              _Vs&&... __vs) {
-    return __base::__visit_alt(_VSTD::forward<_Visitor>(__visitor),
+  static constexpr decltype(auto) __visit_alt(_Vis&& __vis, _Vs&&... __vs) {
+    return __base::__visit_alt(_VSTD::forward<_Vis>(__vis),
                                _VSTD::forward<_Vs>(__vs).__impl...);
   }
 
-  template <class _Visitor, class... _Vs>
+  template <class _Vis, class _Vp, class _Wp>
   inline _LIBCPP_INLINE_VISIBILITY
   static constexpr decltype(auto)
-  __visit_value_at(size_t __index, _Visitor&& __visitor, _Vs&&... __vs) {
-    return __visit_alt_at(
-        __index,
-        __make_value_visitor(_VSTD::forward<_Visitor>(__visitor)),
-        _VSTD::forward<_Vs>(__vs)...);
+  __visit_value_at(size_t __index, _Vis&& __vis, _Vp&& __v, _Wp&& __w) {
+    return __visit_alt_at(__index,
+                          __make_value_visitor(_VSTD::forward<_Vis>(__vis)),
+                          _VSTD::forward<_Vp>(__v),
+                          _VSTD::forward<_Wp>(__w));
   }
 
-  template <class _Visitor, class... _Vs>
+  template <class _Vis, class... _Vs>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr decltype(auto) __visit_value(_Visitor&& __visitor,
-                                                _Vs&&... __vs) {
-    return __visit_alt(
-        __make_value_visitor(_VSTD::forward<_Visitor>(__visitor)),
-        _VSTD::forward<_Vs>(__vs)...);
+  static constexpr decltype(auto) __visit_value(_Vis&& __vis, _Vs&&... __vs) {
+    return __visit_alt(__make_value_visitor(_VSTD::forward<_Vis>(__vis)),
+                       _VSTD::forward<_Vs>(__vs)...);
   }
 
 private:
-  template <class _Visitor, class... _Values>
+  template <class _Vis, class... _Values>
+  inline _LIBCPP_INLINE_VISIBILITY
   static constexpr void __std_visit_exhaustive_visitor_check() {
-    static_assert(is_invocable_v<_Visitor, _Values...>,
+    static_assert(is_invocable_v<_Vis, _Values...>,
                   "`std::visit` requires the visitor to be exhaustive.");
   }
 
-  template <class _Visitor>
+  template <class _Vis>
   struct __value_visitor {
     template <class... _Alts>
     inline _LIBCPP_INLINE_VISIBILITY
     constexpr decltype(auto) operator()(_Alts&&... __alts) const {
       __std_visit_exhaustive_visitor_check<
-          _Visitor,
-          decltype((_VSTD::forward<_Alts>(__alts).__value))...>();
-      return __invoke_constexpr(_VSTD::forward<_Visitor>(__visitor),
+          _Vis, decltype((_VSTD::forward<_Alts>(__alts).__value))...>();
+      return __invoke_constexpr(_VSTD::forward<_Vis>(__vis),
                                 _VSTD::forward<_Alts>(__alts).__value...);
     }
-    _Visitor&& __visitor;
+    _Vis&& __vis;
   };
 
-  template <class _Visitor>
+  template <class _Vis>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto __make_value_visitor(_Visitor&& __visitor) {
-    return __value_visitor<_Visitor>{_VSTD::forward<_Visitor>(__visitor)};
+  static constexpr auto __make_value_visitor(_Vis&& __vis) {
+    return __value_visitor<_Vis>{_VSTD::forward<_Vis>(__vis)};
   }
 };
 
+#undef _LIBCPP_VARIANT_SWITCH_MAX
+#undef _LIBCPP_VARIANT_CASES
+#undef _LIBCPP_VARIANT_CASES_256
+#undef _LIBCPP_VARIANT_CASES_64
+#undef _LIBCPP_VARIANT_CASES_16
+#undef _LIBCPP_VARIANT_CASES_4
+
 } // namespace __visitation
 
 template <size_t _Index, class _Tp>
@@ -633,7 +720,7 @@
 
 #define _LIBCPP_VARIANT_UNION(destructible_trait, destructor)                  \
   template <size_t _Index, class _Tp, class... _Types>                         \
-  union _LIBCPP_TEMPLATE_VIS __union<destructible_trait,                      \
+  union _LIBCPP_TEMPLATE_VIS __union<destructible_trait,                       \
                                       _Index,                                  \
                                       _Tp,                                     \
                                       _Types...> {                             \
@@ -787,9 +874,9 @@
   template <size_t _Ip, class _Tp, class... _Args>
   inline _LIBCPP_INLINE_VISIBILITY
   static _Tp& __construct_alt(__alt<_Ip, _Tp>& __a, _Args&&... __args) {
-    ::new ((void*)_VSTD::addressof(__a))
+    auto* result = ::new ((void*)_VSTD::addressof(__a))
         __alt<_Ip, _Tp>(in_place, _VSTD::forward<_Args>(__args)...);
-    return __a.__value;
+    return result->__value;
   }
 
   template <class _Rhs>
@@ -816,7 +903,7 @@
 #define _LIBCPP_VARIANT_MOVE_CONSTRUCTOR(move_constructible_trait,             \
                                          move_constructor)                     \
   template <class... _Types>                                                   \
-  class _LIBCPP_TEMPLATE_VIS __move_constructor<__traits<_Types...>,          \
+  class _LIBCPP_TEMPLATE_VIS __move_constructor<__traits<_Types...>,           \
                                                  move_constructible_trait>     \
       : public __constructor<__traits<_Types...>> {                            \
     using __base_type = __constructor<__traits<_Types...>>;                    \
@@ -856,7 +943,7 @@
 #define _LIBCPP_VARIANT_COPY_CONSTRUCTOR(copy_constructible_trait,             \
                                          copy_constructor)                     \
   template <class... _Types>                                                   \
-  class _LIBCPP_TEMPLATE_VIS __copy_constructor<__traits<_Types...>,          \
+  class _LIBCPP_TEMPLATE_VIS __copy_constructor<__traits<_Types...>,           \
                                                  copy_constructible_trait>     \
       : public __move_constructor<__traits<_Types...>> {                       \
     using __base_type = __move_constructor<__traits<_Types...>>;               \
@@ -902,7 +989,7 @@
   auto& __emplace(_Args&&... __args) {
     this->__destroy();
     auto& __res = this->__construct_alt(__access::__base::__get_alt<_Ip>(*this),
-                          _VSTD::forward<_Args>(__args)...);
+                                        _VSTD::forward<_Args>(__args)...);
     this->__index = _Ip;
     return __res;
   }
@@ -955,7 +1042,7 @@
 #define _LIBCPP_VARIANT_MOVE_ASSIGNMENT(move_assignable_trait,                 \
                                         move_assignment)                       \
   template <class... _Types>                                                   \
-  class _LIBCPP_TEMPLATE_VIS __move_assignment<__traits<_Types...>,           \
+  class _LIBCPP_TEMPLATE_VIS __move_assignment<__traits<_Types...>,            \
                                                 move_assignable_trait>         \
       : public __assignment<__traits<_Types...>> {                             \
     using __base_type = __assignment<__traits<_Types...>>;                     \
@@ -996,7 +1083,7 @@
 #define _LIBCPP_VARIANT_COPY_ASSIGNMENT(copy_assignable_trait,                 \
                                         copy_assignment)                       \
   template <class... _Types>                                                   \
-  class _LIBCPP_TEMPLATE_VIS __copy_assignment<__traits<_Types...>,           \
+  class _LIBCPP_TEMPLATE_VIS __copy_assignment<__traits<_Types...>,            \
                                                 copy_assignable_trait>         \
       : public __move_assignment<__traits<_Types...>> {                        \
     using __base_type = __move_assignment<__traits<_Types...>>;                \
@@ -1590,18 +1677,12 @@
       __lhs.index(), __convert_to_bool<greater_equal<>>{}, __lhs, __rhs);
 }
 
-template <class _Visitor, class... _Vs>
+template <class _Vis, class... _Vs>
 inline _LIBCPP_INLINE_VISIBILITY
 _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS
-constexpr decltype(auto) visit(_Visitor&& __visitor, _Vs&&... __vs) {
+constexpr decltype(auto) visit(_Vis&& __vis, _Vs&&... __vs) {
   using __variant_detail::__visitation::__variant;
-  bool __results[] = {__vs.valueless_by_exception()...};
-  for (bool __result : __results) {
-    if (__result) {
-      __throw_bad_variant_access();
-    }
-  }
-  return __variant::__visit_value(_VSTD::forward<_Visitor>(__visitor),
+  return __variant::__visit_value(_VSTD::forward<_Vis>(__vis),
                                   _VSTD::forward<_Vs>(__vs)...);
 }
 
diff --git a/test/std/utilities/variant/variant.visit/visit.pass.cpp b/test/std/utilities/variant/variant.visit/visit.pass.cpp
index 41ce442..fc03407 100644
--- a/test/std/utilities/variant/variant.visit/visit.pass.cpp
+++ b/test/std/utilities/variant/variant.visit/visit.pass.cpp
@@ -140,6 +140,30 @@
     std::visit(std::move(cobj), v, v2);
     assert((Fn::check_call<long &, std::string &>(CT_Const | CT_RValue)));
   }
+  {
+    using V = std::variant<int, long, double, std::string>;
+    V v1(42l), v2("hello"), v3(101), v4(1.1);
+    std::visit(obj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int &, double &>(CT_NonConst | CT_LValue)));
+    std::visit(cobj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int &, double &>(CT_Const | CT_LValue)));
+    std::visit(std::move(obj), v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int &, double &>(CT_NonConst | CT_RValue)));
+    std::visit(std::move(cobj), v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int &, double &>(CT_Const | CT_RValue)));
+  }
+  {
+    using V = std::variant<int, long, double, int*, std::string>;
+    V v1(42l), v2("hello"), v3(nullptr), v4(1.1);
+    std::visit(obj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int *&, double &>(CT_NonConst | CT_LValue)));
+    std::visit(cobj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int *&, double &>(CT_Const | CT_LValue)));
+    std::visit(std::move(obj), v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int *&, double &>(CT_NonConst | CT_RValue)));
+    std::visit(std::move(cobj), v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int *&, double &>(CT_Const | CT_RValue)));
+  }
 }
 
 void test_argument_forwarding() {
@@ -188,23 +212,31 @@
     std::visit(obj, std::move(cv));
     assert(Fn::check_call<int &&>(Val));
   }
-  { // multi argument - multi variant
-    using S = const std::string &;
-    using V = std::variant<int, S, long &&>;
-    const std::string str = "hello";
-    long l = 43;
-    V v1(42);
-    const V &cv1 = v1;
-    V v2(str);
-    const V &cv2 = v2;
-    V v3(std::move(l));
-    const V &cv3 = v3;
-    std::visit(obj, v1, v2, v3);
-    assert((Fn::check_call<int &, S, long &>(Val)));
-    std::visit(obj, cv1, cv2, std::move(v3));
-    assert((Fn::check_call<const int &, S, long &&>(Val)));
-  }
 #endif
+  { // multi argument - multi variant
+    using V = std::variant<int, std::string, long>;
+    V v1(42), v2("hello"), v3(43l);
+    std::visit(obj, v1, v2, v3);
+    assert((Fn::check_call<int &, std::string &, long &>(Val)));
+    std::visit(obj, std::as_const(v1), std::as_const(v2), std::move(v3));
+    assert((Fn::check_call<const int &, const std::string &, long &&>(Val)));
+  }
+  {
+    using V = std::variant<int, long, double, std::string>;
+    V v1(42l), v2("hello"), v3(101), v4(1.1);
+    std::visit(obj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int &, double &>(Val)));
+    std::visit(obj, std::as_const(v1), std::as_const(v2), std::move(v3), std::move(v4));
+    assert((Fn::check_call<const long &, const std::string &, int &&, double &&>(Val)));
+  }
+  {
+    using V = std::variant<int, long, double, int*, std::string>;
+    V v1(42l), v2("hello"), v3(nullptr), v4(1.1);
+    std::visit(obj, v1, v2, v3, v4);
+    assert((Fn::check_call<long &, std::string &, int *&, double &>(Val)));
+    std::visit(obj, std::as_const(v1), std::as_const(v2), std::move(v3), std::move(v4));
+    assert((Fn::check_call<const long &, const std::string &, int *&&, double &&>(Val)));
+  }
 }
 
 struct ReturnFirst {
@@ -250,6 +282,16 @@
     constexpr V3 v3;
     static_assert(std::visit(aobj, v1, v2, v3) == 3, "");
   }
+  {
+    using V = std::variant<int, long, double, int *>;
+    constexpr V v1(42l), v2(101), v3(nullptr), v4(1.1);
+    static_assert(std::visit(aobj, v1, v2, v3, v4) == 4, "");
+  }
+  {
+    using V = std::variant<int, long, double, long long, int *>;
+    constexpr V v1(42l), v2(101), v3(nullptr), v4(1.1);
+    static_assert(std::visit(aobj, v1, v2, v3, v4) == 4, "");
+  }
 }
 
 void test_exceptions() {
@@ -295,6 +337,21 @@
     makeEmpty(v2);
     assert(test(v, v2));
   }
+  {
+    using V = std::variant<int, long, double, MakeEmptyT>;
+    V v1(42l), v2(101), v3(202), v4(1.1);
+    makeEmpty(v1);
+    assert(test(v1, v2, v3, v4));
+  }
+  {
+    using V = std::variant<int, long, double, long long, MakeEmptyT>;
+    V v1(42l), v2(101), v3(202), v4(1.1);
+    makeEmpty(v1);
+    makeEmpty(v2);
+    makeEmpty(v3);
+    makeEmpty(v4);
+    assert(test(v1, v2, v3, v4));
+  }
 #endif
 }