String Literal and Wide String Literal Encoding from the Preprocessor

Adds the __clang_literal_encoding__ and __clang_wide_literal_encoding__
predefined macros to expose the encoding used for string literals to
the preprocessor.

GitOrigin-RevId: 701d70d4c25c4e02b303ba6dee1495708496f615
diff --git a/docs/LanguageExtensions.rst b/docs/LanguageExtensions.rst
index a906dc7..0075911 100644
--- a/docs/LanguageExtensions.rst
+++ b/docs/LanguageExtensions.rst
@@ -383,6 +383,18 @@
   Defined to a string that captures the Clang marketing version, including the
   Subversion tag or revision number, e.g., "``1.5 (trunk 102332)``".
 
+``__clang_literal_encoding__``
+  Defined to a narrow string literal that represents the current encoding of
+  narrow string literals, e.g., ``"hello"``. This macro typically expands to
+  "UTF-8" (but may change in the future if the
+  ``-fexec-charset="Encoding-Name"`` option is implemented.)
+
+``__clang_wide_literal_encoding__``
+  Defined to a narrow string literal that represents the current encoding of
+  wide string literals, e.g., ``L"hello"``. This macro typically expands to
+  "UTF-16" or "UTF-32" (but may change in the future if the
+  ``-fwide-exec-charset="Encoding-Name"`` option is implemented.)
+
 .. _langext-vectors:
 
 Vectors and Extended Vectors
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index 3d69c59..7876a36 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -778,6 +778,21 @@
     }
   }
 
+  // Macros to help identify the narrow and wide character sets
+  // FIXME: clang currently ignores -fexec-charset=. If this changes,
+  // then this may need to be updated.
+  Builder.defineMacro("__clang_literal_encoding__", "\"UTF-8\"");
+  if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
+    // FIXME: 32-bit wchar_t signals UTF-32. This may change
+    // if -fwide-exec-charset= is ever supported.
+    Builder.defineMacro("__clang_wide_literal_encoding__", "\"UTF-32\"");
+  } else {
+    // FIXME: Less-than 32-bit wchar_t generally means UTF-16
+    // (e.g., Windows, 32-bit IBM). This may need to be
+    // updated if -fwide-exec-charset= is ever supported.
+    Builder.defineMacro("__clang_wide_literal_encoding__", "\"UTF-16\"");
+  }
+
   if (LangOpts.Optimize)
     Builder.defineMacro("__OPTIMIZE__");
   if (LangOpts.OptimizeSize)
diff --git a/test/Preprocessor/init-x86.c b/test/Preprocessor/init-x86.c
index d22a2ab..527cd39 100644
--- a/test/Preprocessor/init-x86.c
+++ b/test/Preprocessor/init-x86.c
@@ -1306,10 +1306,12 @@
 // X86_64-CLOUDABI:#define __amd64 1
 // X86_64-CLOUDABI:#define __amd64__ 1
 // X86_64-CLOUDABI:#define __clang__ 1
+// X86_64-CLOUDABI:#define __clang_literal_encoding__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_major__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_minor__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_patchlevel__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_version__ {{.*}}
+// X86_64-CLOUDABI:#define __clang_wide_literal_encoding__ {{.*}}
 // X86_64-CLOUDABI:#define __llvm__ 1
 // X86_64-CLOUDABI:#define __x86_64 1
 // X86_64-CLOUDABI:#define __x86_64__ 1
diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c
index 136200a..0329453 100644
--- a/test/Preprocessor/init.c
+++ b/test/Preprocessor/init.c
@@ -115,10 +115,12 @@
 // COMMON:#define __STDC__ 1
 // COMMON:#define __VERSION__ {{.*}}
 // COMMON:#define __clang__ 1
+// COMMON:#define __clang_literal_encoding__ {{.*}}
 // COMMON:#define __clang_major__ {{[0-9]+}}
 // COMMON:#define __clang_minor__ {{[0-9]+}}
 // COMMON:#define __clang_patchlevel__ {{[0-9]+}}
 // COMMON:#define __clang_version__ {{.*}}
+// COMMON:#define __clang_wide_literal_encoding__ {{.*}}
 // COMMON:#define __llvm__ 1
 //
 // RUN: %clang_cc1 -E -dM -triple=x86_64-pc-win32 < /dev/null | FileCheck -match-full-lines -check-prefix C-DEFAULT %s
@@ -1844,10 +1846,12 @@
 // WEBASSEMBLY-NOT:#define __WINT_UNSIGNED__
 // WEBASSEMBLY-NEXT:#define __WINT_WIDTH__ 32
 // WEBASSEMBLY-NEXT:#define __clang__ 1
+// WEBASSEMBLY-NEXT:#define __clang_literal_encoding__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_major__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_minor__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_patchlevel__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_version__ "{{.*}}"
+// WEBASSEMBLY-NEXT:#define __clang_wide_literal_encoding__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __llvm__ 1
 // WEBASSEMBLY-NOT:#define __unix
 // WEBASSEMBLY-NOT:#define __unix__