blob: 8fa7be5f2c075f716480f77dab993684accbc5df [file] [log] [blame]
//===-- include/flang/Parser/char-set.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_PARSER_CHAR_SET_H_
#define FORTRAN_PARSER_CHAR_SET_H_
// Sets of distinct characters that are valid in Fortran programs outside
// character literals are encoded as 64-bit integers by mapping them to a 6-bit
// character set encoding in which the case of letters is lost (even if
// mixed case input reached the parser, which it does not). These sets
// need to be suitable for constexprs, so std::bitset<> was not eligible.
#include <cinttypes>
#include <string>
namespace Fortran::parser {
struct SetOfChars {
constexpr SetOfChars() {}
constexpr SetOfChars(char c) {
// This is basically the old DECSIX encoding, which maps the
// 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^'
// in that range are unused in Fortran after preprocessing outside
// character literals. We repurpose '^' and '?' for newline and unknown
// characters (resp.), leaving the others alone in case this code might
// be useful in preprocssing.
if (c == '\n') {
// map newline to '^'
c = '^';
} else if (c < 32 || c >= 127) {
// map other control characters, DEL, and 8-bit characters to '?'
c = '?';
} else if (c >= 96) {
// map lower-case letters to upper-case
c -= 32;
}
// range is now [32..95]; reduce to [0..63] and use as a shift count
bits_ = static_cast<std::uint64_t>(1) << (c - 32);
}
constexpr SetOfChars(const char str[], std::size_t n) {
for (std::size_t j{0}; j < n; ++j) {
bits_ |= SetOfChars{str[j]}.bits_;
}
}
constexpr SetOfChars(const SetOfChars &) = default;
constexpr SetOfChars(SetOfChars &&) = default;
constexpr SetOfChars &operator=(const SetOfChars &) = default;
constexpr SetOfChars &operator=(SetOfChars &&) = default;
constexpr bool empty() const { return bits_ == 0; }
constexpr bool Has(SetOfChars that) const {
return (that.bits_ & ~bits_) == 0;
}
constexpr SetOfChars Union(SetOfChars that) const {
return SetOfChars{bits_ | that.bits_};
}
constexpr SetOfChars Intersection(SetOfChars that) const {
return SetOfChars{bits_ & that.bits_};
}
constexpr SetOfChars Difference(SetOfChars that) const {
return SetOfChars{bits_ & ~that.bits_};
}
std::string ToString() const;
private:
constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
std::uint64_t bits_{0};
};
} // namespace Fortran::parser
#endif // FORTRAN_PARSER_CHAR_SET_H_