blob: 5951e59248e94fb99f0e65eb6f808c11b8b9895e [file] [log] [blame]
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
#include <algorithm>
#include <chrono>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <string>
#include "include/tzdb/time_zone_private.h"
#include "include/tzdb/types_private.h"
#include "include/tzdb/tzdb_list_private.h"
#include "include/tzdb/tzdb_private.h"
// Contains a parser for the IANA time zone data files.
//
// These files can be found at https://data.iana.org/time-zones/ and are in the
// public domain. Information regarding the input can be found at
// https://data.iana.org/time-zones/tz-how-to.html and
// https://man7.org/linux/man-pages/man8/zic.8.html.
//
// As indicated at https://howardhinnant.github.io/date/tz.html#Installation
// For Windows another file seems to be required
// https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
// This file seems to contain the mapping of Windows time zone name to IANA
// time zone names.
//
// However this article mentions another way to do the mapping on Windows
// https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255
// This requires Windows 10 Version 1903, which was released in May of 2019
// and considered end of life in December 2020
// https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing
//
// TODO TZDB Implement the Windows mapping in tzdb::current_zone
_LIBCPP_BEGIN_NAMESPACE_STD
namespace chrono {
// This function is weak so it can be overriden in the tests. The
// declaration is in the test header test/support/test_tzdb.h
_LIBCPP_WEAK string_view __libcpp_tzdb_directory() {
#if defined(__linux__)
return "/usr/share/zoneinfo/";
#else
# error "unknown path to the IANA Time Zone Database"
#endif
}
//===----------------------------------------------------------------------===//
// Details
//===----------------------------------------------------------------------===//
[[nodiscard]] static bool __is_whitespace(int __c) { return __c == ' ' || __c == '\t'; }
static void __skip_optional_whitespace(istream& __input) {
while (chrono::__is_whitespace(__input.peek()))
__input.get();
}
static void __skip_mandatory_whitespace(istream& __input) {
if (!chrono::__is_whitespace(__input.get()))
std::__throw_runtime_error("corrupt tzdb: expected whitespace");
chrono::__skip_optional_whitespace(__input);
}
[[nodiscard]] static bool __is_eol(int __c) { return __c == '\n' || __c == std::char_traits<char>::eof(); }
static void __skip_line(istream& __input) {
while (!chrono::__is_eol(__input.peek())) {
__input.get();
}
__input.get();
}
static void __skip(istream& __input, char __suffix) {
if (std::tolower(__input.peek()) == __suffix)
__input.get();
}
static void __skip(istream& __input, string_view __suffix) {
for (auto __c : __suffix)
if (std::tolower(__input.peek()) == __c)
__input.get();
}
static void __matches(istream& __input, char __expected) {
if (std::tolower(__input.get()) != __expected)
std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected + '\'').c_str());
}
static void __matches(istream& __input, string_view __expected) {
for (auto __c : __expected)
if (std::tolower(__input.get()) != __c)
std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected) + '\'').c_str());
}
[[nodiscard]] static string __parse_string(istream& __input) {
string __result;
while (true) {
int __c = __input.get();
switch (__c) {
case ' ':
case '\t':
case '\n':
__input.unget();
[[fallthrough]];
case istream::traits_type::eof():
if (__result.empty())
std::__throw_runtime_error("corrupt tzdb: expected a string");
return __result;
default:
__result.push_back(__c);
}
}
}
[[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) {
int64_t __result = __input.get();
if (__leading_zero_allowed) {
if (__result < '0' || __result > '9')
std::__throw_runtime_error("corrupt tzdb: expected a digit");
} else {
if (__result < '1' || __result > '9')
std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");
}
__result -= '0';
while (true) {
if (__input.peek() < '0' || __input.peek() > '9')
return __result;
// In order to avoid possible overflows we limit the accepted range.
// Most values parsed are expected to be very small:
// - 8784 hours in a year
// - 31 days in a month
// - year no real maximum, these values are expected to be less than
// the range of the year type.
//
// However the leapseconds use a seconds after epoch value. Using an
// int would run into an overflow in 2038. By using a 64-bit value
// the range is large enough for the bilions of years. Limiting that
// range slightly to make the code easier is not an issue.
if (__result > (std::numeric_limits<int64_t>::max() / 16))
std::__throw_runtime_error("corrupt tzdb: integral too large");
__result *= 10;
__result += __input.get() - '0';
}
}
//===----------------------------------------------------------------------===//
// Calendar
//===----------------------------------------------------------------------===//
[[nodiscard]] static day __parse_day(istream& __input) {
unsigned __result = chrono::__parse_integral(__input, false);
if (__result > 31)
std::__throw_runtime_error("corrupt tzdb day: value too large");
return day{__result};
}
[[nodiscard]] static weekday __parse_weekday(istream& __input) {
// TZDB allows the shortest unique name.
switch (std::tolower(__input.get())) {
case 'f':
chrono::__skip(__input, "riday");
return Friday;
case 'm':
chrono::__skip(__input, "onday");
return Monday;
case 's':
switch (std::tolower(__input.get())) {
case 'a':
chrono::__skip(__input, "turday");
return Saturday;
case 'u':
chrono::__skip(__input, "nday");
return Sunday;
}
break;
case 't':
switch (std::tolower(__input.get())) {
case 'h':
chrono::__skip(__input, "ursday");
return Thursday;
case 'u':
chrono::__skip(__input, "esday");
return Tuesday;
}
break;
case 'w':
chrono::__skip(__input, "ednesday");
return Wednesday;
}
std::__throw_runtime_error("corrupt tzdb weekday: invalid name");
}
[[nodiscard]] static month __parse_month(istream& __input) {
// TZDB allows the shortest unique name.
switch (std::tolower(__input.get())) {
case 'a':
switch (std::tolower(__input.get())) {
case 'p':
chrono::__skip(__input, "ril");
return April;
case 'u':
chrono::__skip(__input, "gust");
return August;
}
break;
case 'd':
chrono::__skip(__input, "ecember");
return December;
case 'f':
chrono::__skip(__input, "ebruary");
return February;
case 'j':
switch (std::tolower(__input.get())) {
case 'a':
chrono::__skip(__input, "nuary");
return January;
case 'u':
switch (std::tolower(__input.get())) {
case 'n':
chrono::__skip(__input, 'e');
return June;
case 'l':
chrono::__skip(__input, 'y');
return July;
}
}
break;
case 'm':
if (std::tolower(__input.get()) == 'a')
switch (std::tolower(__input.get())) {
case 'y':
return May;
case 'r':
chrono::__skip(__input, "ch");
return March;
}
break;
case 'n':
chrono::__skip(__input, "ovember");
return November;
case 'o':
chrono::__skip(__input, "ctober");
return October;
case 's':
chrono::__skip(__input, "eptember");
return September;
}
std::__throw_runtime_error("corrupt tzdb month: invalid name");
}
[[nodiscard]] static year __parse_year_value(istream& __input) {
bool __negative = __input.peek() == '-';
if (__negative) [[unlikely]]
__input.get();
int64_t __result = __parse_integral(__input, true);
if (__result > static_cast<int>(year::max())) {
if (__negative)
std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum");
std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum");
}
return year{static_cast<int>(__negative ? -__result : __result)};
}
[[nodiscard]] static year __parse_year(istream& __input) {
if (std::tolower(__input.peek()) != 'm') [[likely]]
return chrono::__parse_year_value(__input);
__input.get();
switch (std::tolower(__input.peek())) {
case 'i':
__input.get();
chrono::__skip(__input, 'n');
[[fallthrough]];
case ' ':
// The m is minimum, even when that is ambiguous.
return year::min();
case 'a':
__input.get();
chrono::__skip(__input, 'x');
return year::max();
}
std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'");
}
//===----------------------------------------------------------------------===//
// TZDB fields
//===----------------------------------------------------------------------===//
[[nodiscard]] static year __parse_to(istream& __input, year __only) {
if (std::tolower(__input.peek()) != 'o')
return chrono::__parse_year(__input);
__input.get();
chrono::__skip(__input, "nly");
return __only;
}
[[nodiscard]] static __tz::__constrained_weekday::__comparison_t __parse_comparison(istream& __input) {
switch (__input.get()) {
case '>':
chrono::__matches(__input, '=');
return __tz::__constrained_weekday::__ge;
case '<':
chrono::__matches(__input, '=');
return __tz::__constrained_weekday::__le;
}
std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='");
}
[[nodiscard]] static __tz::__on __parse_on(istream& __input) {
if (std::isdigit(__input.peek()))
return chrono::__parse_day(__input);
if (std::tolower(__input.peek()) == 'l') {
chrono::__matches(__input, "last");
return weekday_last(chrono::__parse_weekday(__input));
}
return __tz::__constrained_weekday{
chrono::__parse_weekday(__input), chrono::__parse_comparison(__input), chrono::__parse_day(__input)};
}
[[nodiscard]] static seconds __parse_duration(istream& __input) {
seconds __result{0};
int __c = __input.peek();
bool __negative = __c == '-';
if (__negative) {
__input.get();
// Negative is either a negative value or a single -.
// The latter means 0 and the parsing is complete.
if (!std::isdigit(__input.peek()))
return __result;
}
__result += hours(__parse_integral(__input, true));
if (__input.peek() != ':')
return __negative ? -__result : __result;
__input.get();
__result += minutes(__parse_integral(__input, true));
if (__input.peek() != ':')
return __negative ? -__result : __result;
__input.get();
__result += seconds(__parse_integral(__input, true));
if (__input.peek() != '.')
return __negative ? -__result : __result;
__input.get();
(void)__parse_integral(__input, true); // Truncate the digits.
return __negative ? -__result : __result;
}
[[nodiscard]] static __tz::__clock __parse_clock(istream& __input) {
switch (__input.get()) { // case sensitive
case 'w':
return __tz::__clock::__local;
case 's':
return __tz::__clock::__standard;
case 'u':
case 'g':
case 'z':
return __tz::__clock::__universal;
}
__input.unget();
return __tz::__clock::__local;
}
[[nodiscard]] static bool __parse_dst(istream& __input, seconds __offset) {
switch (__input.get()) { // case sensitive
case 's':
return false;
case 'd':
return true;
}
__input.unget();
return __offset != 0s;
}
[[nodiscard]] static __tz::__at __parse_at(istream& __input) {
return {__parse_duration(__input), __parse_clock(__input)};
}
[[nodiscard]] static __tz::__save __parse_save(istream& __input) {
seconds __time = chrono::__parse_duration(__input);
return {__time, chrono::__parse_dst(__input, __time)};
}
[[nodiscard]] static string __parse_letters(istream& __input) {
string __result = __parse_string(__input);
// Canonicalize "-" to "" since they are equivalent in the specification.
return __result != "-" ? __result : "";
}
[[nodiscard]] static __tz::__continuation::__rules_t __parse_rules(istream& __input) {
int __c = __input.peek();
// A single - is not a SAVE but a special case.
if (__c == '-') {
__input.get();
if (chrono::__is_whitespace(__input.peek()))
return monostate{};
__input.unget();
return chrono::__parse_save(__input);
}
if (std::isdigit(__c) || __c == '+')
return chrono::__parse_save(__input);
return chrono::__parse_string(__input);
}
[[nodiscard]] static __tz::__continuation __parse_continuation(__tz::__rules_storage_type& __rules, istream& __input) {
__tz::__continuation __result;
__result.__rule_database_ = std::addressof(__rules);
// Note STDOFF is specified as
// This field has the same format as the AT and SAVE fields of rule lines;
// These fields have different suffix letters, these letters seem
// not to be used so do not allow any of them.
__result.__stdoff = chrono::__parse_duration(__input);
chrono::__skip_mandatory_whitespace(__input);
__result.__rules = chrono::__parse_rules(__input);
chrono::__skip_mandatory_whitespace(__input);
__result.__format = chrono::__parse_string(__input);
chrono::__skip_optional_whitespace(__input);
if (chrono::__is_eol(__input.peek()))
return __result;
__result.__year = chrono::__parse_year(__input);
chrono::__skip_optional_whitespace(__input);
if (chrono::__is_eol(__input.peek()))
return __result;
__result.__in = chrono::__parse_month(__input);
chrono::__skip_optional_whitespace(__input);
if (chrono::__is_eol(__input.peek()))
return __result;
__result.__on = chrono::__parse_on(__input);
chrono::__skip_optional_whitespace(__input);
if (chrono::__is_eol(__input.peek()))
return __result;
__result.__at = __parse_at(__input);
return __result;
}
//===----------------------------------------------------------------------===//
// Time Zone Database entries
//===----------------------------------------------------------------------===//
static string __parse_version(istream& __input) {
// The first line in tzdata.zi contains
// # version YYYYw
// The parser expects this pattern
// #\s*version\s*\(.*)
// This part is not documented.
chrono::__matches(__input, '#');
chrono::__skip_optional_whitespace(__input);
chrono::__matches(__input, "version");
chrono::__skip_mandatory_whitespace(__input);
return chrono::__parse_string(__input);
}
[[nodiscard]]
static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) {
auto __result = [&]() -> __tz::__rule& {
auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{});
return __rule.second.emplace_back();
};
if (__rules.empty())
return __result();
// Typically rules are in contiguous order in the database.
// But there are exceptions, some rules are interleaved.
if (__rules.back().first == __name)
return __rules.back().second.emplace_back();
if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; });
__it != ranges::end(__rules))
return __it->second.emplace_back();
return __result();
}
static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
chrono::__skip_mandatory_whitespace(__input);
string __name = chrono::__parse_string(__input);
__tz::__rule& __rule = __create_entry(__rules, __name);
chrono::__skip_mandatory_whitespace(__input);
__rule.__from = chrono::__parse_year(__input);
chrono::__skip_mandatory_whitespace(__input);
__rule.__to = chrono::__parse_to(__input, __rule.__from);
chrono::__skip_mandatory_whitespace(__input);
chrono::__matches(__input, '-');
chrono::__skip_mandatory_whitespace(__input);
__rule.__in = chrono::__parse_month(__input);
chrono::__skip_mandatory_whitespace(__input);
__rule.__on = chrono::__parse_on(__input);
chrono::__skip_mandatory_whitespace(__input);
__rule.__at = __parse_at(__input);
chrono::__skip_mandatory_whitespace(__input);
__rule.__save = __parse_save(__input);
chrono::__skip_mandatory_whitespace(__input);
__rule.__letters = chrono::__parse_letters(__input);
chrono::__skip_line(__input);
}
static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
chrono::__skip_mandatory_whitespace(__input);
auto __p = std::make_unique<time_zone::__impl>(chrono::__parse_string(__input), __rules);
vector<__tz::__continuation>& __continuations = __p->__continuations();
chrono::__skip_mandatory_whitespace(__input);
do {
// The first line must be valid, continuations are optional.
__continuations.emplace_back(__parse_continuation(__rules, __input));
chrono::__skip_line(__input);
chrono::__skip_optional_whitespace(__input);
} while (std::isdigit(__input.peek()) || __input.peek() == '-');
__tzdb.zones.emplace_back(time_zone::__create(std::move(__p)));
}
static void __parse_link(tzdb& __tzdb, istream& __input) {
chrono::__skip_mandatory_whitespace(__input);
string __target = chrono::__parse_string(__input);
chrono::__skip_mandatory_whitespace(__input);
string __name = chrono::__parse_string(__input);
chrono::__skip_line(__input);
__tzdb.links.emplace_back(std::__private_constructor_tag{}, std::move(__name), std::move(__target));
}
static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) {
while (true) {
int __c = std::tolower(__input.get());
switch (__c) {
case istream::traits_type::eof():
return;
case ' ':
case '\t':
case '\n':
break;
case '#':
chrono::__skip_line(__input);
break;
case 'r':
chrono::__skip(__input, "ule");
chrono::__parse_rule(__db, __rules, __input);
break;
case 'z':
chrono::__skip(__input, "one");
chrono::__parse_zone(__db, __rules, __input);
break;
case 'l':
chrono::__skip(__input, "ink");
chrono::__parse_link(__db, __input);
break;
default:
std::__throw_runtime_error("corrupt tzdb: unexpected input");
}
}
}
static void __parse_leap_seconds(vector<leap_second>& __leap_seconds, istream&& __input) {
// The file stores dates since 1 January 1900, 00:00:00, we want
// seconds since 1 January 1970.
constexpr auto __offset = sys_days{1970y / January / 1} - sys_days{1900y / January / 1};
while (true) {
switch (__input.peek()) {
case istream::traits_type::eof():
return;
case ' ':
case '\t':
case '\n':
__input.get();
continue;
case '#':
chrono::__skip_line(__input);
continue;
}
sys_seconds __date = sys_seconds{seconds{chrono::__parse_integral(__input, false)}} - __offset;
chrono::__skip_mandatory_whitespace(__input);
seconds __value{chrono::__parse_integral(__input, false)};
chrono::__skip_line(__input);
__leap_seconds.emplace_back(std::__private_constructor_tag{}, __date, __value);
}
}
void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {
filesystem::path __root = chrono::__libcpp_tzdb_directory();
ifstream __tzdata{__root / "tzdata.zi"};
__tzdb.version = chrono::__parse_version(__tzdata);
chrono::__parse_tzdata(__tzdb, __rules, __tzdata);
std::ranges::sort(__tzdb.zones);
std::ranges::sort(__tzdb.links);
std::ranges::sort(__rules, {}, [](const auto& p) { return p.first; });
// There are two files with the leap second information
// - leapseconds as specified by zic
// - leap-seconds.list the source data
// The latter is much easier to parse, it seems Howard shares that
// opinion.
chrono::__parse_leap_seconds(__tzdb.leap_seconds, ifstream{__root / "leap-seconds.list"});
// The Standard requires the leap seconds to be sorted. The file
// leap-seconds.list usually provides them in sorted order, but that is not
// guaranteed so we ensure it here.
std::ranges::sort(__tzdb.leap_seconds);
}
#ifdef _WIN32
[[nodiscard]] static const time_zone* __current_zone_windows(const tzdb& tzdb) {
// TODO TZDB Implement this on Windows.
std::__throw_runtime_error("unknown time zone");
}
#else // ifdef _WIN32
[[nodiscard]] static const time_zone* __current_zone_posix(const tzdb& tzdb) {
// On POSIX systems there are several ways to configure the time zone.
// In order of priority they are:
// - TZ environment variable
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08
// The documentation is unclear whether or not it's allowed to
// change time zone information. For example the TZ string
// MST7MDT
// this is an entry in tzdata.zi. The value
// MST
// is also an entry. Is it allowed to use the following?
// MST-3
// Even when this is valid there is no time_zone record in the
// database. Since the library would need to return a valid pointer,
// this means the library needs to allocate and leak a pointer.
//
// - The time zone name is the target of the symlink /etc/localtime
// relative to /usr/share/zoneinfo/
// The algorithm is like this:
// - If the environment variable TZ is set and points to a valid
// record use this value.
// - Else use the name based on the `/etc/localtime` symlink.
if (const char* __tz = getenv("TZ"))
if (const time_zone* __result = tzdb.__locate_zone(__tz))
return __result;
filesystem::path __path = "/etc/localtime";
if (!std::filesystem::exists(__path))
std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist");
if (!std::filesystem::is_symlink(__path))
std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink");
filesystem::path __tz = filesystem::read_symlink(__path);
// The path may be a relative path, in that case convert it to an absolute
// path based on the proper initial directory.
if (__tz.is_relative())
__tz = filesystem::canonical("/etc" / __tz);
string __name = filesystem::relative(__tz, "/usr/share/zoneinfo/");
if (const time_zone* __result = tzdb.__locate_zone(__name))
return __result;
std::__throw_runtime_error(("tzdb: the time zone '" + __name + "' is not found in the database").c_str());
}
#endif // ifdef _WIN32
//===----------------------------------------------------------------------===//
// Public API
//===----------------------------------------------------------------------===//
_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list& get_tzdb_list() {
static tzdb_list __result{new tzdb_list::__impl()};
return __result;
}
[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* tzdb::__current_zone() const {
#ifdef _WIN32
return chrono::__current_zone_windows(*this);
#else
return chrono::__current_zone_posix(*this);
#endif
}
_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() {
if (chrono::remote_version() == chrono::get_tzdb().version)
return chrono::get_tzdb();
return chrono::get_tzdb_list().__implementation().__load();
}
_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version() {
filesystem::path __root = chrono::__libcpp_tzdb_directory();
ifstream __tzdata{__root / "tzdata.zi"};
return chrono::__parse_version(__tzdata);
}
} // namespace chrono
_LIBCPP_END_NAMESPACE_STD