Substantially reduce instantiations and debug size of std::function

std::function uses a standard allocator to manage its memory, however
standard allocators are templates and using them correctly requires
a stupid amount of instantiations. This leads to a substantial increase
in debug info and object sizes.

This patch addresses the issue by dropping the allocator when possible
and using raw new and delete to get memory.

This change decreases the object file size for the test func.wrap.func.con/F.pass.cpp by 33% and the final binary by 29% (when compiled with -g -ggnu-pubnames -gpubnames).

It also roughly halfs the number of entries in the pubnames and pubtype
sections.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@362865 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/__config b/include/__config
index b63102c..6a1e0f5 100644
--- a/include/__config
+++ b/include/__config
@@ -1272,6 +1272,12 @@
 #  define _LIBCPP_FALLTHROUGH() ((void)0)
 #endif
 
+#if defined(_LIBCPP_COMPILER_CLANG) || defined(_LIBCPP_COMPILER_GCC)
+#define _LIBCPP_NODEBUG __attribute__((nodebug))
+#else
+#define _LIBCPP_NODEBUG
+#endif
+
 #if defined(_LIBCPP_ABI_MICROSOFT) && \
     (defined(_LIBCPP_COMPILER_MSVC) || __has_declspec_attribute(empty_bases))
 #  define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
diff --git a/include/functional b/include/functional
index 2cec0ea..e2eac0e 100644
--- a/include/functional
+++ b/include/functional
@@ -1479,6 +1479,8 @@
 // __alloc_func holds a functor and an allocator.
 
 template <class _Fp, class _Ap, class _FB> class __alloc_func;
+template <class _Fp, class _FB>
+class __default_alloc_func;
 
 template <class _Fp, class _Ap, class _Rp, class... _ArgTypes>
 class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)>
@@ -1548,6 +1550,56 @@
 
     _LIBCPP_INLINE_VISIBILITY
     void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); }
+
+    static void __destroy_and_delete(__alloc_func* __f) {
+      typedef allocator_traits<_Alloc> __alloc_traits;
+      typedef typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type
+          _FunAlloc;
+      _FunAlloc __a(__f->__get_allocator());
+      __f->destroy();
+      __a.deallocate(__f, 1);
+    }
+};
+
+template <class _Fp, class _Rp, class... _ArgTypes>
+class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> {
+  _Fp __f_;
+
+public:
+  typedef _Fp _Target;
+
+  _LIBCPP_INLINE_VISIBILITY
+  const _Target& __target() const { return __f_; }
+
+  _LIBCPP_INLINE_VISIBILITY
+  explicit __default_alloc_func(_Target&& __f) : __f_(std::move(__f)) {}
+
+  _LIBCPP_INLINE_VISIBILITY
+  explicit __default_alloc_func(const _Target& __f) : __f_(__f) {}
+
+  _LIBCPP_INLINE_VISIBILITY
+  _Rp operator()(_ArgTypes&&... __arg) {
+    typedef __invoke_void_return_wrapper<_Rp> _Invoker;
+    return _Invoker::__call(__f_, _VSTD::forward<_ArgTypes>(__arg)...);
+  }
+
+  _LIBCPP_INLINE_VISIBILITY
+  __default_alloc_func* __clone() const {
+      __builtin_new_allocator::__holder_t __hold =
+        __builtin_new_allocator::__allocate_type<__default_alloc_func>(1);
+    __default_alloc_func* __res =
+        ::new (__hold.get()) __default_alloc_func(__f_);
+    (void)__hold.release();
+    return __res;
+  }
+
+  _LIBCPP_INLINE_VISIBILITY
+  void destroy() _NOEXCEPT { __f_.~_Target(); }
+
+  static void __destroy_and_delete(__default_alloc_func* __f) {
+    __f->destroy();
+      __builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1);
+  }
 };
 
 // __base provides an abstract interface for copyable functors.
@@ -1696,7 +1748,7 @@
     __value_func() _NOEXCEPT : __f_(0) {}
 
     template <class _Fp, class _Alloc>
-    _LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc __a)
+    _LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc& __a)
         : __f_(0)
     {
         typedef allocator_traits<_Alloc> __alloc_traits;
@@ -1724,6 +1776,11 @@
         }
     }
 
+    template <class _Fp,
+        class = typename enable_if<!is_same<typename decay<_Fp>::type, __value_func>::value>::type>
+    _LIBCPP_INLINE_VISIBILITY explicit __value_func(_Fp&& __f)
+        : __value_func(std::forward<_Fp>(__f), allocator<_Fp>()) {}
+
     _LIBCPP_INLINE_VISIBILITY
     __value_func(const __value_func& __f)
     {
@@ -1923,29 +1980,22 @@
         return __f->__clone();
     }
 
-    template <typename _Fun> static void __large_destroy(void* __s)
-    {
-        typedef allocator_traits<typename _Fun::_Alloc> __alloc_traits;
-        typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type
-            _FunAlloc;
-        _Fun* __f = static_cast<_Fun*>(__s);
-        _FunAlloc __a(__f->__get_allocator());
-        __f->destroy();
-        __a.deallocate(__f, 1);
+    template <typename _Fun>
+    static void __large_destroy(void* __s) {
+      _Fun::__destroy_and_delete(static_cast<_Fun*>(__s));
     }
 
     template <typename _Fun>
     _LIBCPP_INLINE_VISIBILITY static const __policy*
-        __choose_policy(/* is_small = */ false_type)
-    {
-        static const _LIBCPP_CONSTEXPR __policy __policy_ = {
-            &__large_clone<_Fun>, &__large_destroy<_Fun>, false,
+    __choose_policy(/* is_small = */ false_type) {
+      static const _LIBCPP_CONSTEXPR __policy __policy_ = {
+          &__large_clone<_Fun>, &__large_destroy<_Fun>, false,
 #ifndef _LIBCPP_NO_RTTI
-            &typeid(typename _Fun::_Target)
+          &typeid(typename _Fun::_Target)
 #else
-            nullptr
+          nullptr
 #endif
-        };
+      };
         return &__policy_;
     }
 
@@ -2070,6 +2120,25 @@
         }
     }
 
+    template <class _Fp, class = typename enable_if<!is_same<typename decay<_Fp>::type, __policy_func>::value>::type>
+    _LIBCPP_INLINE_VISIBILITY explicit __policy_func(_Fp&& __f)
+        : __policy_(__policy::__create_empty()) {
+      typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun;
+
+      if (__function::__not_null(__f)) {
+        __invoker_ = __invoker::template __create<_Fun>();
+        __policy_ = __policy::__create<_Fun>();
+        if (__use_small_storage<_Fun>()) {
+          ::new ((void*)&__buf_.__small) _Fun(_VSTD::move(__f));
+        } else {
+          __builtin_new_allocator::__holder_t __hold =
+              __builtin_new_allocator::__allocate_type<_Fun>(1);
+          __buf_.__large = ::new (__hold.get()) _Fun(_VSTD::move(__f));
+          (void)__hold.release();
+        }
+      }
+    }
+
     _LIBCPP_INLINE_VISIBILITY
     __policy_func(const __policy_func& __f)
         : __buf_(__f.__buf_), __invoker_(__f.__invoker_),
@@ -2290,8 +2359,7 @@
 
 template <class _Rp, class... _ArgTypes>
 template <class _Fp, class>
-function<_Rp(_ArgTypes...)>::function(_Fp __f)
-    : __f_(_VSTD::move(__f), allocator<_Fp>()) {}
+function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(_VSTD::move(__f)) {}
 
 #if _LIBCPP_STD_VER <= 14
 template <class _Rp, class... _ArgTypes>
diff --git a/include/memory b/include/memory
index 8d88617..6a8755e 100644
--- a/include/memory
+++ b/include/memory
@@ -5668,6 +5668,52 @@
      >
    : true_type {};
 
+// __builtin_new_allocator -- A non-templated helper for allocating and
+// deallocating memory using __builtin_operator_new and
+// __builtin_operator_delete. It should be used in preference to
+// `std::allocator<T>` to avoid additional instantiations.
+struct __builtin_new_allocator {
+  struct __builtin_new_deleter {
+    typedef void* pointer_type;
+
+    _LIBCPP_CONSTEXPR explicit __builtin_new_deleter(size_t __size, size_t __align)
+        : __size_(__size), __align_(__align) {}
+
+    void operator()(void* p) const _NOEXCEPT {
+        std::__libcpp_deallocate(p, __size_, __align_);
+    }
+
+   private:
+    size_t __size_;
+    size_t __align_;
+  };
+
+  typedef unique_ptr<void, __builtin_new_deleter> __holder_t;
+
+  static __holder_t __allocate_bytes(size_t __s, size_t __align) {
+      return __holder_t(std::__libcpp_allocate(__s, __align),
+                     __builtin_new_deleter(__s, __align));
+  }
+
+  static void __deallocate_bytes(void* __p, size_t __s,
+                                 size_t __align) _NOEXCEPT {
+      std::__libcpp_deallocate(__p, __s, __align);
+  }
+
+  template <class _Tp>
+  _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE
+  static __holder_t __allocate_type(size_t __n) {
+      return __allocate_bytes(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp));
+  }
+
+  template <class _Tp>
+  _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE
+  static void __deallocate_type(void* __p, size_t __n) _NOEXCEPT {
+      __deallocate_bytes(__p, __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp));
+  }
+};
+
+
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS