[libcxx] Provide locale conversions to tests through lit substitution (#105651)

There are 2 problems today that this PR resolves:

libcxx tests assume the thousands separator for fr_FR locale is x00A0 on
Windows. This currently fails when run on newer versions of Windows (it
seems to have been updated to the new correct value of 0x202F around
windows 11. The exact windows version where it changed doesn't seem to
be documented anywhere). Depending the OS version, you need different
values.

There are several ifdefs to determine the environment/platform-specific
locale conversion values and it leads to maintenance as things change
over time.

This PR includes the following changes:

- Provide the environment's locale conversion values through a
  substitution. The test can opt in by placing the substitution value in a
  define flag.
- Remove the platform ifdefs (the swapping of values between Windows,
  Linux, Apple, AIX).

This is accomplished through a lit feature action that fetches the
environment's locale conversions (lconv) for members like
'thousands_sep' that we need to provide. This should ensure that we
don't lose the effectiveness of the test itself.

In addition, as a result of the above, this PR:

- Fixes a handful of locale tests which unexpectedly fail on newer
  Windows versions.
- Resolves 3 XFAIL FIX-MEs.

Originally submitted in https://github.com/llvm/llvm-project/pull/86649.

Co-authored-by: Rodrigo Salazar <4rodrigosalazar@gmail.com>
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
index bbb67d6..f02241a 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
@@ -13,6 +13,8 @@
 
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class money_get<charT, InputIterator>
@@ -59,7 +61,8 @@
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_fr_FR(in);
+  const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, fr_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
index e680f2e..371cf0e 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
@@ -11,6 +11,8 @@
 
 // REQUIRES: locale.ru_RU.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // XFAIL: glibc-old-ru_RU-decimal-point
 
 // <locale>
@@ -52,7 +54,8 @@
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_ru_RU(in);
+  const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, ru_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
index 47a48de..9ac95cc5 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
@@ -13,6 +13,8 @@
 
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class money_put<charT, OutputIterator>
@@ -59,7 +61,8 @@
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_fr_FR(in);
+  const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, fr_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
index 4aea101..be1e397 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
@@ -11,6 +11,8 @@
 
 // REQUIRES: locale.ru_RU.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // XFAIL: glibc-old-ru_RU-decimal-point
 
 // <locale>
@@ -52,7 +54,8 @@
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_ru_RU(in);
+  const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, ru_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
index 2a70741..6b65705 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
@@ -9,13 +9,14 @@
 // NetBSD does not support LC_MONETARY at the moment
 // XFAIL: netbsd
 
-// XFAIL: LIBCXX-FREEBSD-FIXME
-
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
 // REQUIRES: locale.ru_RU.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class moneypunct_byname<charT, International>
@@ -27,6 +28,7 @@
 #include <cassert>
 
 #include "test_macros.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -110,17 +112,10 @@
         Fnt f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == ' ');
     }
-    // The below tests work around GLIBC's use of U202F as mon_thousands_sep.
+
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-#if defined(_CS_GNU_LIBC_VERSION)
-    const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F';
-#elif defined(_WIN32)
-    const wchar_t fr_sep = L'\u00A0';
-#elif defined(_AIX)
-    const wchar_t fr_sep = L'\u202F';
-#else
-    const wchar_t fr_sep = L' ';
-#endif
+    const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+
     {
         Fwf f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == fr_sep);
@@ -140,19 +135,8 @@
         assert(f.thousands_sep() == sep);
     }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-    // The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
-    // and U002E as mon_decimal_point.
-    // TODO: Fix thousands_sep for 'char'.
-    // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
-#   if defined(_CS_GNU_LIBC_VERSION)
-    // FIXME libc++ specifically works around \u00A0 by translating it into
-    // a regular space.
-    const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F';
-#   elif defined(_WIN32) || defined(_AIX)
-    const wchar_t wsep = L'\u00A0';
-#   else
-    const wchar_t wsep = L' ';
-#   endif
+    const wchar_t wsep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
         assert(f.thousands_sep() == wsep);
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
index 850352b..ccecd85 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
@@ -14,6 +14,8 @@
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
+
 // <locale>
 
 // template <class charT> class numpunct_byname;
@@ -25,6 +27,7 @@
 #include <cassert>
 
 #include "test_macros.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
@@ -74,18 +77,11 @@
         }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
         {
-#if defined(_CS_GNU_LIBC_VERSION)
-            const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f';
-#  elif defined(_AIX)
-            const wchar_t wsep = L'\u202F';
-#  elif defined(_WIN32)
-            const wchar_t wsep = L'\u00A0';
-#  else
-            const wchar_t wsep = L',';
-#  endif
-            typedef wchar_t C;
-            const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.thousands_sep() == wsep);
+          const wchar_t wsep = LocaleHelpers::thousands_sep_or_default(FR_THOU_SEP);
+
+          typedef wchar_t C;
+          const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
+          assert(np.thousands_sep() == wsep);
         }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
     }
diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
index aecb96b..ebf907a 100644
--- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
+++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
@@ -16,6 +16,9 @@
 // REQUIRES: locale.fr_FR.UTF-8
 // REQUIRES: locale.ja_JP.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
+// ADDITIONAL_COMPILE_FLAGS: -DFR_DEC_POINT=%{LOCALE_CONV_FR_FR_UTF_8_DECIMAL_POINT}
+
 // <chrono>
 
 // template<class Rep, class Period = ratio<1>> class duration;
@@ -33,6 +36,7 @@
 #include <sstream>
 
 #include "make_string.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 #include "test_macros.h"
 
@@ -88,21 +92,11 @@
     assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s"));
 #endif
   } else {
-#ifdef _WIN32
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u00A0000\u00A0000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u00A0000\u00A0000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u00A0000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u00A0000,1235s"));
-#elif defined(__APPLE__)
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s"));
-#else
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u202f000\u202f000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u202f000\u202f000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u202f000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u202f000,1235s"));
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
+    assert(stream_fr_FR_locale<CharT>(1'000'000s) == L"1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
+    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == L"-1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
+    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == L"1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
 #endif
   }
 
diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h
index 3eb24eb..946c2fe 100644
--- a/libcxx/test/support/locale_helpers.h
+++ b/libcxx/test/support/locale_helpers.h
@@ -41,37 +41,6 @@
   return out;
 }
 
-// GLIBC 2.27 and newer use U+202F NARROW NO-BREAK SPACE as a thousands separator.
-// This function converts the spaces in string inputs to U+202F if need
-// be. FreeBSD's locale data also uses U+202F, since 2018.
-// Windows uses U+00A0 NO-BREAK SPACE.
-std::wstring convert_thousands_sep_fr_FR(std::wstring const& in) {
-#if defined(_CS_GNU_LIBC_VERSION)
-  if (glibc_version_less_than("2.27"))
-    return in;
-  else
-    return convert_thousands_sep(in, L'\u202F');
-#elif defined(__FreeBSD__)
-  return convert_thousands_sep(in, L'\u202F');
-#elif defined(_WIN32)
-  return convert_thousands_sep(in, L'\u00A0');
-#else
-  return in;
-#endif
-}
-
-// GLIBC 2.27 uses U+202F NARROW NO-BREAK SPACE as a thousands separator.
-// FreeBSD, AIX and Windows use U+00A0 NO-BREAK SPACE.
-std::wstring convert_thousands_sep_ru_RU(std::wstring const& in) {
-#if defined(TEST_HAS_GLIBC)
-  return convert_thousands_sep(in, L'\u202F');
-#  elif defined(__FreeBSD__) || defined(_WIN32) || defined(_AIX)
-  return convert_thousands_sep(in, L'\u00A0');
-#  else
-  return in;
-#  endif
-}
-
 std::wstring negate_en_US(std::wstring s) {
 #if defined(_WIN32)
   return L"(" + s + L")";
@@ -80,6 +49,12 @@
 #endif
 }
 
+wchar_t thousands_sep_or_default(std::wstring s) { return !s.empty() ? s[0] : L','; }
+
+wchar_t mon_thousands_sep_or_default(std::wstring s) { return thousands_sep_or_default(s); }
+
+wchar_t decimal_point_or_default(std::wstring s) { return !s.empty() ? s[0] : L'.'; }
+
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
 std::string negate_en_US(std::string s) {
diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index e4b413d..a83dcd1 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -425,6 +425,10 @@
     "fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"],
     "cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"],
 }
+provide_locale_conversions = {
+    "fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"],
+    "ru_RU.UTF-8": ["mon_thousands_sep"],
+}
 for locale, alts in locales.items():
     # Note: Using alts directly in the lambda body here will bind it to the value at the
     # end of the loop. Assigning it to a default argument works around this issue.
@@ -432,10 +436,95 @@
         Feature(
             name="locale.{}".format(locale),
             when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts),
-        )
+            actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction(
+                cfg, locale, alts, provide_locale_conversions[locale]
+            )
+            if locale in provide_locale_conversions
+            and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg)
+            else [],
+        ),
     )
 
 
+# Provide environment locale conversions through substitutions to avoid platform specific
+# maintenance.
+def _getLocaleFlagsAction(cfg, locale, alts, members):
+    alts_list = ",".join([f'"{l}"' for l in alts])
+    get_member_list = ",".join([f"lc->{m}" for m in members])
+
+    localeconv_info = programOutput(
+        cfg,
+        r"""
+        #if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
+        #define _CRT_SECURE_NO_WARNINGS
+        #endif
+        #include <stdio.h>
+        #include <locale.h>
+        #include <stdlib.h>
+        #include <wchar.h>
+
+        // Print each requested locale conversion member on separate lines.
+        int main() {
+          const char* locales[] = { %s };
+          for (int loc_i = 0; loc_i < %d; ++loc_i) {
+            if (!setlocale(LC_ALL, locales[loc_i])) {
+              continue; // Choose first locale name that is recognized.
+            }
+
+            lconv* lc = localeconv();
+            const char* members[] = { %s };
+            for (size_t m_i = 0; m_i < %d; ++m_i) {
+              if (!members[m_i]) {
+                printf("\n"); // member value is an empty string
+                continue;
+              }
+
+              size_t len = mbstowcs(nullptr, members[m_i], 0);
+              if (len == static_cast<size_t>(-1)) {
+                fprintf(stderr, "mbstowcs failed unexpectedly\n");
+                return 1;
+              }
+              // Include room for null terminator. Use malloc as these features
+              // are also used by lit configs that don't use -lc++ (libunwind tests).
+              wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t));
+              size_t ret = mbstowcs(dst, members[m_i], len + 1);
+              if (ret == static_cast<size_t>(-1)) {
+                fprintf(stderr, "mbstowcs failed unexpectedly\n");
+                free(dst);
+                return 1;
+              }
+
+              for (size_t i = 0; i < len; ++i) {
+                if (dst[i] > 0x7F) {
+                  printf("\\u%%04x", dst[i]);
+                } else {
+                  // c++03 does not allow basic ascii-range characters in UCNs
+                  printf("%%c", (char)dst[i]);
+                }
+              }
+              printf("\n");
+              free(dst);
+            }
+            return 0;
+          }
+
+          return 1;
+        }
+        """
+        % (alts_list, len(alts), get_member_list, len(members)),
+    )
+    valid_define_name = re.sub(r"[.-]", "_", locale).upper()
+    return [
+        # Provide locale conversion through a substitution.
+        # Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f"
+        AddSubstitution(
+            f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}",
+            lambda cfg, value=value: f"'L\"{value}\"'",
+        )
+        for member, value in zip(members, localeconv_info.split("\n"))
+    ]
+
+
 # Add features representing the target platform name: darwin, linux, windows, etc...
 DEFAULT_FEATURES += [
     Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)),