[libcxx] Implement the stat function family on top of native windows APIs

While the windows CRTs (the modern UCRT, and the legacy msvcrt.dll
that mingw still often defaults to) do provide stat functions, they're
a bit lacking - they only provide second precision on the modification
time, lack support for symlinks and a few other details.

Instead reimplement them using a couple windows native functions,
getting exactly the info we need. (Technically, the implementation
within the CRT calls these functions anyway.)

If we only need a few fields, we could also do with fewer calls, as a
later optimization.

Differential Revision: https://reviews.llvm.org/D91141

GitOrigin-RevId: 2ff8662b5d16129ec6d1ee60dcec4f6ff8f717e2
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5beb81e..224299b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -92,6 +92,7 @@
     filesystem/filesystem_common.h
     filesystem/operations.cpp
     filesystem/directory_iterator.cpp
+    filesystem/posix_compat.h
     )
   # Filesystem uses __int128_t, which requires a definition of __muloi4 when
   # compiled with UBSAN. This definition is not provided by libgcc_s, but is
diff --git a/src/filesystem/filesystem_common.h b/src/filesystem/filesystem_common.h
index e0fdbcc..8204e9a 100644
--- a/src/filesystem/filesystem_common.h
+++ b/src/filesystem/filesystem_common.h
@@ -224,9 +224,41 @@
 using chrono::duration;
 using chrono::duration_cast;
 
+#if defined(_LIBCPP_WIN32API)
+// Various C runtime versions (UCRT, or the legacy msvcrt.dll used by
+// some mingw toolchains) provide different stat function implementations,
+// with a number of limitations with respect to what we want from the
+// stat function. Instead provide our own (in the anonymous detail namespace
+// in posix_compat.h) which does exactly what we want, along with our own
+// stat structure and flag macros.
+
+struct TimeSpec {
+  int64_t tv_sec;
+  int64_t tv_nsec;
+};
+struct StatT {
+  unsigned st_mode;
+  TimeSpec st_atim;
+  TimeSpec st_mtim;
+  uint64_t st_dev; // FILE_ID_INFO::VolumeSerialNumber
+  struct FileIdStruct {
+    unsigned char id[16]; // FILE_ID_INFO::FileId
+    bool operator==(const FileIdStruct &other) const {
+      for (int i = 0; i < 16; i++)
+        if (id[i] != other.id[i])
+          return false;
+      return true;
+    }
+  } st_ino;
+  uint32_t st_nlink;
+  uintmax_t st_size;
+};
+
+#else
 using TimeSpec = struct timespec;
 using TimeVal = struct timeval;
 using StatT = struct stat;
+#endif
 
 template <class FileTimeT, class TimeT,
           bool IsFloat = is_floating_point<typename FileTimeT::rep>::value>
diff --git a/src/filesystem/operations.cpp b/src/filesystem/operations.cpp
index 50a895d..548a027 100644
--- a/src/filesystem/operations.cpp
+++ b/src/filesystem/operations.cpp
@@ -17,6 +17,8 @@
 
 #include "filesystem_common.h"
 
+#include "posix_compat.h"
+
 #if defined(_LIBCPP_WIN32API)
 # define WIN32_LEAN_AND_MEAN
 # define NOMINMAX
@@ -495,7 +497,7 @@
 
 file_status posix_stat(path const& p, StatT& path_stat, error_code* ec) {
   error_code m_ec;
-  if (::stat(p.c_str(), &path_stat) == -1)
+  if (detail::stat(p.c_str(), &path_stat) == -1)
     m_ec = detail::capture_errno();
   return create_file_status(m_ec, p, path_stat, ec);
 }
@@ -507,7 +509,7 @@
 
 file_status posix_lstat(path const& p, StatT& path_stat, error_code* ec) {
   error_code m_ec;
-  if (::lstat(p.c_str(), &path_stat) == -1)
+  if (detail::lstat(p.c_str(), &path_stat) == -1)
     m_ec = detail::capture_errno();
   return create_file_status(m_ec, p, path_stat, ec);
 }
@@ -545,7 +547,7 @@
   m_status = file_status{};
   m_stat = {};
   error_code m_ec;
-  if (::fstat(fd, &m_stat) == -1)
+  if (detail::fstat(fd, &m_stat) == -1)
     m_ec = capture_errno();
   m_status = create_file_status(m_ec, name, m_stat, &ec);
   return m_status;
@@ -1197,7 +1199,7 @@
   auto buff = std::unique_ptr<char[], NullDeleter>(stack_buff);
 #else
   StatT sb;
-  if (::lstat(p.c_str(), &sb) == -1) {
+  if (detail::lstat(p.c_str(), &sb) == -1) {
     return err.report(capture_errno());
   }
   const size_t size = sb.st_size + 1;
diff --git a/src/filesystem/posix_compat.h b/src/filesystem/posix_compat.h
new file mode 100644
index 0000000..3eec463
--- /dev/null
+++ b/src/filesystem/posix_compat.h
@@ -0,0 +1,187 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//
+// POSIX-like portability helper functions.
+//
+// These generally behave like the proper posix functions, with these
+// exceptions:
+// On Windows, they take paths in wchar_t* form, instead of char* form.
+//
+// These are provided within an anonymous namespace within the detail
+// namespace - callers need to include this header and call them as
+// detail::function(), regardless of platform.
+//
+
+#ifndef POSIX_COMPAT_H
+#define POSIX_COMPAT_H
+
+#include "filesystem"
+
+#include "filesystem_common.h"
+
+#if defined(_LIBCPP_WIN32API)
+# define WIN32_LEAN_AND_MEAN
+# define NOMINMAX
+# include <windows.h>
+# include <io.h>
+#else
+# include <unistd.h>
+# include <sys/stat.h>
+# include <sys/statvfs.h>
+#endif
+#include <time.h>
+
+_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
+
+namespace detail {
+namespace {
+
+#if defined(_LIBCPP_WIN32API)
+
+// Various C runtime header sets provide more or less of these. As we
+// provide our own implementation, undef all potential defines from the
+// C runtime headers and provide a complete set of macros of our own.
+
+#undef _S_IFMT
+#undef _S_IFDIR
+#undef _S_IFCHR
+#undef _S_IFIFO
+#undef _S_IFREG
+#undef _S_IFBLK
+#undef _S_IFLNK
+#undef _S_IFSOCK
+
+#define _S_IFMT   0xF000
+#define _S_IFDIR  0x4000
+#define _S_IFCHR  0x2000
+#define _S_IFIFO  0x1000
+#define _S_IFREG  0x8000
+#define _S_IFBLK  0x6000
+#define _S_IFLNK  0xA000
+#define _S_IFSOCK 0xC000
+
+#undef S_ISDIR
+#undef S_ISFIFO
+#undef S_ISCHR
+#undef S_ISREG
+#undef S_ISLNK
+#undef S_ISBLK
+#undef S_ISSOCK
+
+#define S_ISDIR(m)      (((m) & _S_IFMT) == _S_IFDIR)
+#define S_ISCHR(m)      (((m) & _S_IFMT) == _S_IFCHR)
+#define S_ISFIFO(m)     (((m) & _S_IFMT) == _S_IFIFO)
+#define S_ISREG(m)      (((m) & _S_IFMT) == _S_IFREG)
+#define S_ISBLK(m)      (((m) & _S_IFMT) == _S_IFBLK)
+#define S_ISLNK(m)      (((m) & _S_IFMT) == _S_IFLNK)
+#define S_ISSOCK(m)     (((m) & _S_IFMT) == _S_IFSOCK)
+
+
+// There were 369 years and 89 leap days from the Windows epoch
+// (1601) to the Unix epoch (1970).
+#define FILE_TIME_OFFSET_SECS (uint64_t(369 * 365 + 89) * (24 * 60 * 60))
+
+TimeSpec filetime_to_timespec(LARGE_INTEGER li) {
+  TimeSpec ret;
+  ret.tv_sec = li.QuadPart / 10000000 - FILE_TIME_OFFSET_SECS;
+  ret.tv_nsec = (li.QuadPart % 10000000) * 100;
+  return ret;
+}
+
+int set_errno(int e = GetLastError()) {
+  errno = static_cast<int>(__win_err_to_errc(e));
+  return -1;
+}
+
+class WinHandle {
+public:
+  WinHandle(const wchar_t *p, DWORD access, DWORD flags) {
+    h = CreateFileW(
+        p, access, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+        nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS | flags, nullptr);
+  }
+  ~WinHandle() {
+    if (h != INVALID_HANDLE_VALUE)
+      CloseHandle(h);
+  }
+  operator HANDLE() const { return h; }
+  operator bool() const { return h != INVALID_HANDLE_VALUE; }
+
+private:
+  HANDLE h;
+};
+
+int stat_handle(HANDLE h, StatT *buf) {
+  FILE_BASIC_INFO basic;
+  if (!GetFileInformationByHandleEx(h, FileBasicInfo, &basic, sizeof(basic)))
+    return set_errno();
+  memset(buf, 0, sizeof(*buf));
+  buf->st_mtim = filetime_to_timespec(basic.LastWriteTime);
+  buf->st_atim = filetime_to_timespec(basic.LastAccessTime);
+  buf->st_mode = 0555; // Read-only
+  if (!(basic.FileAttributes & FILE_ATTRIBUTE_READONLY))
+    buf->st_mode |= 0222; // Write
+  if (basic.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    buf->st_mode |= _S_IFDIR;
+  } else {
+    buf->st_mode |= _S_IFREG;
+  }
+  if (basic.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
+    FILE_ATTRIBUTE_TAG_INFO tag;
+    if (!GetFileInformationByHandleEx(h, FileAttributeTagInfo, &tag,
+                                      sizeof(tag)))
+      return set_errno();
+    if (tag.ReparseTag == IO_REPARSE_TAG_SYMLINK)
+      buf->st_mode = (buf->st_mode & ~_S_IFMT) | _S_IFLNK;
+  }
+  FILE_STANDARD_INFO standard;
+  if (!GetFileInformationByHandleEx(h, FileStandardInfo, &standard,
+                                    sizeof(standard)))
+    return set_errno();
+  buf->st_nlink = standard.NumberOfLinks;
+  buf->st_size = standard.EndOfFile.QuadPart;
+  BY_HANDLE_FILE_INFORMATION info;
+  if (!GetFileInformationByHandle(h, &info))
+    return set_errno();
+  buf->st_dev = info.dwVolumeSerialNumber;
+  memcpy(&buf->st_ino.id[0], &info.nFileIndexHigh, 4);
+  memcpy(&buf->st_ino.id[4], &info.nFileIndexLow, 4);
+  return 0;
+}
+
+int stat_file(const wchar_t *path, StatT *buf, DWORD flags) {
+  WinHandle h(path, FILE_READ_ATTRIBUTES, flags);
+  if (!h)
+    return set_errno();
+  int ret = stat_handle(h, buf);
+  return ret;
+}
+
+int stat(const wchar_t *path, StatT *buf) { return stat_file(path, buf, 0); }
+
+int lstat(const wchar_t *path, StatT *buf) {
+  return stat_file(path, buf, FILE_FLAG_OPEN_REPARSE_POINT);
+}
+
+int fstat(int fd, StatT *buf) {
+  HANDLE h = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+  return stat_handle(h, buf);
+}
+#else
+using ::fstat;
+using ::lstat;
+using ::stat;
+#endif
+
+} // namespace
+} // end namespace detail
+
+_LIBCPP_END_NAMESPACE_FILESYSTEM
+
+#endif // POSIX_COMPAT_H