lib/Parser/token-parsers.h - llvm-project/flang - Git at Google

 //===-- lib/Parser/token-parsers.h ------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef FORTRAN_PARSER_TOKEN_PARSERS_H_
 #define FORTRAN_PARSER_TOKEN_PARSERS_H_

 // These parsers are driven by the parsers of the Fortran grammar to consume
 // the prescanned character stream and recognize context-sensitive tokens.

 #include "basic-parsers.h"
 #include "type-parsers.h"
 #include "flang/Common/idioms.h"
 #include "flang/Parser/char-set.h"
 #include "flang/Parser/characters.h"
 #include "flang/Parser/instrumented-parser.h"
 #include "flang/Parser/provenance.h"
 #include <cstddef>
 #include <cstring>
 #include <functional>
 #include <limits>
 #include <list>
 #include <optional>
 #include <string>

 namespace Fortran::parser {

 // "xyz"_ch matches one instance of the characters x, y, or z without skipping
 // any spaces before or after.  The parser returns the location of the character
 // on success.
 class AnyOfChars {
 public:
   using resultType = const char *;
   constexpr AnyOfChars(const AnyOfChars &) = default;
   constexpr AnyOfChars(SetOfChars set) : set_{set} {}
   std::optional<const char *> Parse(ParseState &state) const {
     if (std::optional<const char *> at{state.PeekAtNextChar()}) {
       if (set_.Has(**at)) {
         state.UncheckedAdvance();
         state.set_anyTokenMatched();
         return at;
       }
     }
     state.Say(MessageExpectedText{set_});
     return std::nullopt;
   }

 private:
   const SetOfChars set_;
 };

 constexpr AnyOfChars operator""_ch(const char str[], std::size_t n) {
   return AnyOfChars{SetOfChars(str, n)};
 }

 constexpr auto letter{"abcdefghijklmnopqrstuvwxyz"_ch};
 constexpr auto digit{"0123456789"_ch};

 // Skips over optional spaces.  Always succeeds.
 struct Space {
   using resultType = Success;
   constexpr Space() {}
   static std::optional<Success> Parse(ParseState &state) {
     while (std::optional<const char *> p{state.PeekAtNextChar()}) {
       if (**p != ' ') {
         break;
       }
       state.UncheckedAdvance();
     }
     return {Success{}};
   }
 };
 constexpr Space space;

 // Skips a space that in free form requires a warning if it precedes a
 // character that could begin an identifier or keyword.  Always succeeds.
 inline void MissingSpace(ParseState &state) {
   if (!state.inFixedForm()) {
     state.Nonstandard(
         LanguageFeature::OptionalFreeFormSpace, "missing space"_port_en_US);
   }
 }

 struct SpaceCheck {
   using resultType = Success;
   constexpr SpaceCheck() {}
   static std::optional<Success> Parse(ParseState &state) {
     if (std::optional<const char *> p{state.PeekAtNextChar()}) {
       char ch{**p};
       if (ch == ' ') {
         state.UncheckedAdvance();
         return space.Parse(state);
       }
       if (IsLegalInIdentifier(ch)) {
         MissingSpace(state);
       }
     }
     return {Success{}};
   }
 };
 constexpr SpaceCheck spaceCheck;

 // Matches a token string.  Spaces in the token string denote where
 // spaces may appear in the source; they can be made mandatory for
 // some free form keyword sequences.  Missing mandatory spaces in free
 // form elicit a warning; they are not necessary for recognition.
 // Spaces before and after the token are also skipped.
 //
 // Token strings appear in the grammar as C++ user-defined literals
 // like "BIND ( C )"_tok and "SYNC ALL"_sptok.  The _tok suffix is implied
 // when a string literal appears before the sequencing operator >> or
 // after the sequencing operator /.  The literal "..."_id parses a
 // token that cannot be a prefix of a longer identifier.
 template <bool MandatoryFreeFormSpace = false, bool MustBeComplete = false>
 class TokenStringMatch {
 public:
   using resultType = Success;
   constexpr TokenStringMatch(const TokenStringMatch &) = default;
   constexpr TokenStringMatch(const char *str, std::size_t n)
       : str_{str}, bytes_{n} {}
   explicit constexpr TokenStringMatch(const char *str) : str_{str} {}
   std::optional<Success> Parse(ParseState &state) const {
     space.Parse(state);
     const char *start{state.GetLocation()};
     const char *p{str_};
     std::optional<const char *> at; // initially empty
     for (std::size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
       bool spaceSkipping{*p == ' '};
       if (spaceSkipping) {
         if (j + 1 == bytes_ || p[1] == ' ' || p[1] == '\0') {
           continue; // redundant; ignore
         }
       }
       if (!at) {
         at = nextCh.Parse(state);
         if (!at) {
           return std::nullopt;
         }
       }
       if (spaceSkipping) {
         if (**at == ' ') {
           at = nextCh.Parse(state);
           if (!at) {
             return std::nullopt;
           }
         } else if constexpr (MandatoryFreeFormSpace) {
           MissingSpace(state);
         }
         // 'at' remains full for next iteration
       } else if (**at == ToLowerCaseLetter(*p)) {
         at.reset();
       } else {
         state.Say(start, MessageExpectedText{str_, bytes_});
         return std::nullopt;
       }
     }
     if constexpr (MustBeComplete) {
       if (auto after{state.PeekAtNextChar()}) {
         if (IsLegalInIdentifier(**after)) {
           state.Say(start, MessageExpectedText{str_, bytes_});
           return std::nullopt;
         }
       }
     }
     state.set_anyTokenMatched();
     if (IsLegalInIdentifier(p[-1])) {
       return spaceCheck.Parse(state);
     } else {
       return space.Parse(state);
     }
   }

 private:
   const char *const str_;
   const std::size_t bytes_{std::string::npos};
 };

 constexpr TokenStringMatch<> operator""_tok(const char str[], std::size_t n) {
   return {str, n};
 }

 constexpr TokenStringMatch<true> operator""_sptok(
     const char str[], std::size_t n) {
   return {str, n};
 }

 constexpr TokenStringMatch<false, true> operator""_id(
     const char str[], std::size_t n) {
   return {str, n};
 }

 template <class PA>
 inline constexpr std::enable_if_t<std::is_class_v<PA>,
     SequenceParser<TokenStringMatch<>, PA>>
 operator>>(const char *str, const PA &p) {
   return SequenceParser<TokenStringMatch<>, PA>{TokenStringMatch<>{str}, p};
 }

 template <class PA>
 inline constexpr std::enable_if_t<std::is_class_v<PA>,
     FollowParser<PA, TokenStringMatch<>>>
 operator/(const PA &p, const char *str) {
   return FollowParser<PA, TokenStringMatch<>>{p, TokenStringMatch<>{str}};
 }

 template <class PA> inline constexpr auto parenthesized(const PA &p) {
   return "(" >> p / ")";
 }

 template <class PA> inline constexpr auto bracketed(const PA &p) {
   return "[" >> p / "]";
 }

 // Quoted character literal constants.
 struct CharLiteralChar {
   using resultType = std::pair<char, bool /* was escaped */>;
   static std::optional<resultType> Parse(ParseState &state) {
     auto at{state.GetLocation()};
     if (std::optional<const char *> cp{nextCh.Parse(state)}) {
       char ch{**cp};
       if (ch == '\n') {
         state.Say(CharBlock{at, state.GetLocation()},
             "Unclosed character constant"_err_en_US);
         return std::nullopt;
       }
       if (ch == '\\') {
         // Most escape sequences in character literals are processed later,
         // but we have to look for quotes here so that doubled quotes work.
         if (std::optional<const char *> next{state.PeekAtNextChar()}) {
           char escaped{**next};
           if (escaped == '\'' || escaped == '"' || escaped == '\\') {
             state.UncheckedAdvance();
             return std::make_pair(escaped, true);
           }
         }
       }
       return std::make_pair(ch, false);
     }
     return std::nullopt;
   }
 };

 template <char quote> struct CharLiteral {
   using resultType = std::string;
   static std::optional<std::string> Parse(ParseState &state) {
     std::string str;
     static constexpr auto nextch{attempt(CharLiteralChar{})};
     while (auto ch{nextch.Parse(state)}) {
       if (ch->second) {
         str += '\\';
       } else if (ch->first == quote) {
         static constexpr auto doubled{attempt(AnyOfChars{SetOfChars{quote}})};
         if (!doubled.Parse(state)) {
           return str;
         }
       }
       str += ch->first;
     }
     return std::nullopt;
   }
 };

 // Parse "BOZ" binary literal quoted constants.
 // As extensions, support X as an alternate hexadecimal marker, and allow
 // BOZX markers to appear as suffixes.
 struct BOZLiteral {
   using resultType = std::string;
   static std::optional<resultType> Parse(ParseState &state) {
     char base{'\0'};
     auto baseChar{[&base](char ch) -> bool {
       switch (ch) {
       case 'b':
       case 'o':
       case 'z':
         base = ch;
         return true;
       case 'x':
         base = 'z';
         return true;
       default:
         return false;
       }
     }};

     space.Parse(state);
     const char *start{state.GetLocation()};
     std::optional<const char *> at{nextCh.Parse(state)};
     if (!at) {
       return std::nullopt;
     }
     if (**at == 'x' &&
         !state.IsNonstandardOk(LanguageFeature::BOZExtensions,
             "nonstandard BOZ literal"_port_en_US)) {
       return std::nullopt;
     }
     if (baseChar(**at)) {
       at = nextCh.Parse(state);
       if (!at) {
         return std::nullopt;
       }
     }

     char quote = **at;
     if (quote != '\'' && quote != '"') {
       return std::nullopt;
     }

     std::string content;
     while (true) {
       at = nextCh.Parse(state);
       if (!at) {
         return std::nullopt;
       }
       if (**at == quote) {
         break;
       }
       if (**at == ' ') {
         continue;
       }
       if (!IsHexadecimalDigit(**at)) {
         return std::nullopt;
       }
       content += ToLowerCaseLetter(**at);
     }

     if (!base) {
       // extension: base allowed to appear as suffix, too
       if (!(at = nextCh.Parse(state)) || !baseChar(**at) ||
           !state.IsNonstandardOk(LanguageFeature::BOZExtensions,
               "nonstandard BOZ literal"_port_en_US)) {
         return std::nullopt;
       }
       spaceCheck.Parse(state);
     }

     if (content.empty()) {
       state.Say(start, "no digit in BOZ literal"_err_en_US);
       return std::nullopt;
     }
     return {std::string{base} + '"' + content + '"'};
   }
 };

 // R711 digit-string -> digit [digit]...
 // N.B. not a token -- no space is skipped
 struct DigitString {
   using resultType = CharBlock;
   static std::optional<resultType> Parse(ParseState &state) {
     if (std::optional<const char *> ch1{state.PeekAtNextChar()}) {
       if (IsDecimalDigit(**ch1)) {
         state.UncheckedAdvance();
         while (std::optional<const char *> p{state.PeekAtNextChar()}) {
           if (!IsDecimalDigit(**p)) {
             break;
           }
           state.UncheckedAdvance();
         }
         return CharBlock{*ch1, state.GetLocation()};
       }
     }
     return std::nullopt;
   }
 };
 constexpr DigitString digitString;

 struct SignedIntLiteralConstantWithoutKind {
   using resultType = CharBlock;
   static std::optional<resultType> Parse(ParseState &state) {
     resultType result{state.GetLocation()};
     static constexpr auto sign{maybe("+-"_ch / space)};
     if (sign.Parse(state)) {
       if (auto digits{digitString.Parse(state)}) {
         result.ExtendToCover(*digits);
         return result;
       }
     }
     return std::nullopt;
   }
 };

 struct DigitString64 {
   using resultType = std::uint64_t;
   static std::optional<std::uint64_t> Parse(ParseState &state) {
     std::optional<const char *> firstDigit{digit.Parse(state)};
     if (!firstDigit) {
       return std::nullopt;
     }
     std::uint64_t value = **firstDigit - '0';
     bool overflow{false};
     static constexpr auto getDigit{attempt(digit)};
     while (auto nextDigit{getDigit.Parse(state)}) {
       if (value > std::numeric_limits<std::uint64_t>::max() / 10) {
         overflow = true;
       }
       value *= 10;
       int digitValue = **nextDigit - '0';
       if (value > std::numeric_limits<std::uint64_t>::max() - digitValue) {
         overflow = true;
       }
       value += digitValue;
     }
     if (overflow) {
       state.Say(*firstDigit, "overflow in decimal literal"_err_en_US);
     }
     return {value};
   }
 };
 constexpr DigitString64 digitString64;

 // R707 signed-int-literal-constant -> [sign] int-literal-constant
 // N.B. Spaces are consumed before and after the sign, since the sign
 // and the int-literal-constant are distinct tokens.  Does not
 // handle a trailing kind parameter.
 static std::optional<std::int64_t> SignedInteger(
     const std::optional<std::uint64_t> &x, Location at, bool negate,
     ParseState &state) {
   if (!x) {
     return std::nullopt;
   }
   std::uint64_t limit{std::numeric_limits<std::int64_t>::max()};
   if (negate) {
     limit = -(limit + 1);
   }
   if (*x > limit) {
     state.Say(at, "overflow in signed decimal literal"_err_en_US);
   }
   std::int64_t value = *x;
   return std::make_optional<std::int64_t>(negate ? -value : value);
 }

 // R710 signed-digit-string -> [sign] digit-string
 // N.B. Not a complete token -- no space is skipped.
 // Used only in the exponent parts of real literal constants.
 struct SignedDigitString {
   using resultType = std::int64_t;
   static std::optional<std::int64_t> Parse(ParseState &state) {
     std::optional<const char *> sign{state.PeekAtNextChar()};
     if (!sign) {
       return std::nullopt;
     }
     bool negate{**sign == '-'};
     if (negate || **sign == '+') {
       state.UncheckedAdvance();
     }
     return SignedInteger(digitString64.Parse(state), *sign, negate, state);
   }
 };

 // Variants of the above for use in FORMAT specifications, where spaces
 // must be ignored.
 struct DigitStringIgnoreSpaces {
   using resultType = std::uint64_t;
   static std::optional<std::uint64_t> Parse(ParseState &state) {
     static constexpr auto getFirstDigit{space >> digit};
     std::optional<const char *> firstDigit{getFirstDigit.Parse(state)};
     if (!firstDigit) {
       return std::nullopt;
     }
     std::uint64_t value = **firstDigit - '0';
     bool overflow{false};
     static constexpr auto getDigit{space >> attempt(digit)};
     while (auto nextDigit{getDigit.Parse(state)}) {
       if (value > std::numeric_limits<std::uint64_t>::max() / 10) {
         overflow = true;
       }
       value *= 10;
       int digitValue = **nextDigit - '0';
       if (value > std::numeric_limits<std::uint64_t>::max() - digitValue) {
         overflow = true;
       }
       value += digitValue;
     }
     if (overflow) {
       state.Say(*firstDigit, "overflow in decimal literal"_err_en_US);
     }
     return value;
   }
 };

 struct PositiveDigitStringIgnoreSpaces {
   using resultType = std::int64_t;
   static std::optional<std::int64_t> Parse(ParseState &state) {
     Location at{state.GetLocation()};
     return SignedInteger(
         DigitStringIgnoreSpaces{}.Parse(state), at, false /*positive*/, state);
   }
 };

 struct SignedDigitStringIgnoreSpaces {
   using resultType = std::int64_t;
   static std::optional<std::int64_t> Parse(ParseState &state) {
     static constexpr auto getSign{space >> attempt("+-"_ch)};
     bool negate{false};
     if (std::optional<const char *> sign{getSign.Parse(state)}) {
       negate = **sign == '-';
     }
     Location at{state.GetLocation()};
     return SignedInteger(
         DigitStringIgnoreSpaces{}.Parse(state), at, negate, state);
   }
 };

 // Legacy feature: Hollerith literal constants
 struct HollerithLiteral {
   using resultType = std::string;
   static std::optional<std::string> Parse(ParseState &state) {
     space.Parse(state);
     const char *start{state.GetLocation()};
     std::optional<std::uint64_t> charCount{
         DigitStringIgnoreSpaces{}.Parse(state)};
     if (!charCount || *charCount < 1) {
       return std::nullopt;
     }
     static constexpr auto letterH{"h"_ch};
     std::optional<const char *> h{letterH.Parse(state)};
     if (!h) {
       return std::nullopt;
     }
     std::string content;
     for (auto j{*charCount}; j-- > 0;) {
       int chBytes{UTF_8CharacterBytes(state.GetLocation())};
       for (int bytes{chBytes}; bytes > 0; --bytes) {
         if (std::optional<const char *> at{nextCh.Parse(state)}) {
           if (chBytes == 1 && !IsPrintable(**at)) {
             state.Say(start, "Bad character in Hollerith"_err_en_US);
             return std::nullopt;
           }
           content += **at;
         } else {
           state.Say(start, "Insufficient characters in Hollerith"_err_en_US);
           return std::nullopt;
         }
       }
     }
     return content;
   }
 };

 struct ConsumedAllInputParser {
   using resultType = Success;
   constexpr ConsumedAllInputParser() {}
   static inline std::optional<Success> Parse(ParseState &state) {
     if (state.IsAtEnd()) {
       return {Success{}};
     }
     return std::nullopt;
   }
 };
 constexpr ConsumedAllInputParser consumedAllInput;

 template <char goal> struct SkipPast {
   using resultType = Success;
   constexpr SkipPast() {}
   constexpr SkipPast(const SkipPast &) {}
   static std::optional<Success> Parse(ParseState &state) {
     while (std::optional<const char *> p{state.GetNextChar()}) {
       if (**p == goal) {
         return {Success{}};
       }
     }
     return std::nullopt;
   }
 };

 template <char goal> struct SkipTo {
   using resultType = Success;
   constexpr SkipTo() {}
   constexpr SkipTo(const SkipTo &) {}
   static std::optional<Success> Parse(ParseState &state) {
     while (std::optional<const char *> p{state.PeekAtNextChar()}) {
       if (**p == goal) {
         return {Success{}};
       }
       state.UncheckedAdvance();
     }
     return std::nullopt;
   }
 };

 // A common idiom in the Fortran grammar is an optional item (usually
 // a nonempty comma-separated list) that, if present, must follow a comma
 // and precede a doubled colon.  When the item is absent, the comma must
 // not appear, and the doubled colons are optional.
 //   [[, xyz] ::]     is  optionalBeforeColons(xyz)
 //   [[, xyz]... ::]  is  optionalBeforeColons(nonemptyList(xyz))
 template <typename PA> inline constexpr auto optionalBeforeColons(const PA &p) {
   using resultType = std::optional<typename PA::resultType>;
   return "," >> construct<resultType>(p) / "::" ||
       ("::"_tok || !","_tok) >> pure<resultType>();
 }
 template <typename PA>
 inline constexpr auto optionalListBeforeColons(const PA &p) {
   using resultType = std::list<typename PA::resultType>;
   return "," >> nonemptyList(p) / "::" ||
       ("::"_tok || !","_tok) >> pure<resultType>();
 }

 // Skip over empty lines, leading spaces, and some compiler directives (viz.,
 // the ones that specify the source form) that might appear before the
 // next statement.  Skip over empty statements (bare semicolons) when
 // not in strict standard conformance mode.  Always succeeds.
 struct SkipStuffBeforeStatement {
   using resultType = Success;
   static std::optional<Success> Parse(ParseState &state) {
     if (UserState * ustate{state.userState()}) {
       if (ParsingLog * log{ustate->log()}) {
         // Save memory: vacate the parsing log before each statement unless
         // we're logging the whole parse for debugging.
         if (!ustate->instrumentedParse()) {
           log->clear();
         }
       }
     }
     while (std::optional<const char *> at{state.PeekAtNextChar()}) {
       if (**at == '\n' || **at == ' ') {
         state.UncheckedAdvance();
       } else if (**at == '!') {
         static const char fixed[] = "!dir$ fixed\n", free[] = "!dir$ free\n";
         static constexpr std::size_t fixedBytes{sizeof fixed - 1};
         static constexpr std::size_t freeBytes{sizeof free - 1};
         std::size_t remain{state.BytesRemaining()};
         if (remain >= fixedBytes && std::memcmp(*at, fixed, fixedBytes) == 0) {
           state.set_inFixedForm(true).UncheckedAdvance(fixedBytes);
         } else if (remain >= freeBytes &&
             std::memcmp(*at, free, freeBytes) == 0) {
           state.set_inFixedForm(false).UncheckedAdvance(freeBytes);
         } else {
           break;
         }
       } else if (**at == ';' &&
           state.IsNonstandardOk(
               LanguageFeature::EmptyStatement, "empty statement"_port_en_US)) {
         state.UncheckedAdvance();
       } else {
         break;
       }
     }
     return {Success{}};
   }
 };
 constexpr SkipStuffBeforeStatement skipStuffBeforeStatement;

 // R602 underscore -> _
 constexpr auto underscore{"_"_ch};

 // Characters besides letters and digits that may appear in names.
 // N.B. Don't accept an underscore if it is immediately followed by a
 // quotation mark, so that kindParam_"character literal" is parsed properly.
 // PGI and ifort accept '$' in identifiers, even as the initial character.
 // Cray and gfortran accept '$', but not as the first character.
 // Cray accepts '@' as well.
 constexpr auto otherIdChar{underscore / !"'\""_ch ||
     extension<LanguageFeature::PunctuationInNames>(
         "nonstandard usage: punctuation in name"_port_en_US, "$@"_ch)};

 constexpr auto logicalTRUE{
     (".TRUE."_tok ||
         extension<LanguageFeature::LogicalAbbreviations>(
             "nonstandard usage: .T. spelling of .TRUE."_port_en_US,
             ".T."_tok)) >>
     pure(true)};
 constexpr auto logicalFALSE{
     (".FALSE."_tok ||
         extension<LanguageFeature::LogicalAbbreviations>(
             "nonstandard usage: .F. spelling of .FALSE."_port_en_US,
             ".F."_tok)) >>
     pure(false)};

 // deprecated: Hollerith literals
 constexpr auto rawHollerithLiteral{
     deprecated<LanguageFeature::Hollerith>(HollerithLiteral{})};

 template <typename A> constexpr decltype(auto) verbatim(A x) {
   return sourced(construct<Verbatim>(x));
 }

 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_
	//===-- lib/Parser/token-parsers.h ------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef FORTRAN_PARSER_TOKEN_PARSERS_H_
	#define FORTRAN_PARSER_TOKEN_PARSERS_H_

	// These parsers are driven by the parsers of the Fortran grammar to consume
	// the prescanned character stream and recognize context-sensitive tokens.

	#include "basic-parsers.h"
	#include "type-parsers.h"
	#include "flang/Common/idioms.h"
	#include "flang/Parser/char-set.h"
	#include "flang/Parser/characters.h"
	#include "flang/Parser/instrumented-parser.h"
	#include "flang/Parser/provenance.h"
	#include <cstddef>
	#include <cstring>
	#include <functional>
	#include <limits>
	#include <list>
	#include <optional>
	#include <string>

	namespace Fortran::parser {

	// "xyz"_ch matches one instance of the characters x, y, or z without skipping
	// any spaces before or after. The parser returns the location of the character
	// on success.
	class AnyOfChars {
	public:
	using resultType = const char *;
	constexpr AnyOfChars(const AnyOfChars &) = default;
	constexpr AnyOfChars(SetOfChars set) : set_{set} {}
	std::optional<const char *> Parse(ParseState &state) const {
	if (std::optional<const char *> at{state.PeekAtNextChar()}) {
	if (set_.Has(**at)) {
	state.UncheckedAdvance();
	state.set_anyTokenMatched();
	return at;
	}
	}
	state.Say(MessageExpectedText{set_});
	return std::nullopt;
	}

	private:
	const SetOfChars set_;
	};

	constexpr AnyOfChars operator""_ch(const char str[], std::size_t n) {
	return AnyOfChars{SetOfChars(str, n)};
	}

	constexpr auto letter{"abcdefghijklmnopqrstuvwxyz"_ch};
	constexpr auto digit{"0123456789"_ch};

	// Skips over optional spaces. Always succeeds.
	struct Space {
	using resultType = Success;
	constexpr Space() {}
	static std::optional<Success> Parse(ParseState &state) {
	while (std::optional<const char *> p{state.PeekAtNextChar()}) {
	if (**p != ' ') {
	break;
	}
	state.UncheckedAdvance();
	}
	return {Success{}};
	}
	};
	constexpr Space space;

	// Skips a space that in free form requires a warning if it precedes a
	// character that could begin an identifier or keyword. Always succeeds.
	inline void MissingSpace(ParseState &state) {
	if (!state.inFixedForm()) {
	state.Nonstandard(
	LanguageFeature::OptionalFreeFormSpace, "missing space"_port_en_US);
	}
	}

	struct SpaceCheck {
	using resultType = Success;
	constexpr SpaceCheck() {}
	static std::optional<Success> Parse(ParseState &state) {
	if (std::optional<const char *> p{state.PeekAtNextChar()}) {
	char ch{**p};
	if (ch == ' ') {
	state.UncheckedAdvance();
	return space.Parse(state);
	}
	if (IsLegalInIdentifier(ch)) {
	MissingSpace(state);
	}
	}
	return {Success{}};
	}
	};
	constexpr SpaceCheck spaceCheck;

	// Matches a token string. Spaces in the token string denote where
	// spaces may appear in the source; they can be made mandatory for
	// some free form keyword sequences. Missing mandatory spaces in free
	// form elicit a warning; they are not necessary for recognition.
	// Spaces before and after the token are also skipped.
	//
	// Token strings appear in the grammar as C++ user-defined literals
	// like "BIND ( C )"_tok and "SYNC ALL"_sptok. The _tok suffix is implied
	// when a string literal appears before the sequencing operator >> or
	// after the sequencing operator /. The literal "..."_id parses a
	// token that cannot be a prefix of a longer identifier.
	template <bool MandatoryFreeFormSpace = false, bool MustBeComplete = false>
	class TokenStringMatch {
	public:
	using resultType = Success;
	constexpr TokenStringMatch(const TokenStringMatch &) = default;
	constexpr TokenStringMatch(const char *str, std::size_t n)
	: str_{str}, bytes_{n} {}
	explicit constexpr TokenStringMatch(const char *str) : str_{str} {}
	std::optional<Success> Parse(ParseState &state) const {
	space.Parse(state);
	const char *start{state.GetLocation()};
	const char *p{str_};
	std::optional<const char *> at; // initially empty
	for (std::size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
	bool spaceSkipping{*p == ' '};
	if (spaceSkipping) {
	if (j + 1 == bytes_ \|\| p[1] == ' ' \|\| p[1] == '\0') {
	continue; // redundant; ignore
	}
	}
	if (!at) {
	at = nextCh.Parse(state);
	if (!at) {
	return std::nullopt;
	}
	}
	if (spaceSkipping) {
	if (**at == ' ') {
	at = nextCh.Parse(state);
	if (!at) {
	return std::nullopt;
	}
	} else if constexpr (MandatoryFreeFormSpace) {
	MissingSpace(state);
	}
	// 'at' remains full for next iteration
	} else if (*at == ToLowerCaseLetter(p)) {
	at.reset();
	} else {
	state.Say(start, MessageExpectedText{str_, bytes_});
	return std::nullopt;
	}
	}
	if constexpr (MustBeComplete) {
	if (auto after{state.PeekAtNextChar()}) {
	if (IsLegalInIdentifier(**after)) {
	state.Say(start, MessageExpectedText{str_, bytes_});
	return std::nullopt;
	}
	}
	}
	state.set_anyTokenMatched();
	if (IsLegalInIdentifier(p[-1])) {
	return spaceCheck.Parse(state);
	} else {
	return space.Parse(state);
	}
	}

	private:
	const char *const str_;
	const std::size_t bytes_{std::string::npos};
	};

	constexpr TokenStringMatch<> operator""_tok(const char str[], std::size_t n) {
	return {str, n};
	}

	constexpr TokenStringMatch<true> operator""_sptok(
	const char str[], std::size_t n) {
	return {str, n};
	}

	constexpr TokenStringMatch<false, true> operator""_id(
	const char str[], std::size_t n) {
	return {str, n};
	}

	template <class PA>
	inline constexpr std::enable_if_t<std::is_class_v<PA>,
	SequenceParser<TokenStringMatch<>, PA>>
	operator>>(const char *str, const PA &p) {
	return SequenceParser<TokenStringMatch<>, PA>{TokenStringMatch<>{str}, p};
	}

	template <class PA>
	inline constexpr std::enable_if_t<std::is_class_v<PA>,
	FollowParser<PA, TokenStringMatch<>>>
	operator/(const PA &p, const char *str) {
	return FollowParser<PA, TokenStringMatch<>>{p, TokenStringMatch<>{str}};
	}

	template <class PA> inline constexpr auto parenthesized(const PA &p) {
	return "(" >> p / ")";
	}

	template <class PA> inline constexpr auto bracketed(const PA &p) {
	return "[" >> p / "]";
	}

	// Quoted character literal constants.
	struct CharLiteralChar {
	using resultType = std::pair<char, bool /* was escaped */>;
	static std::optional<resultType> Parse(ParseState &state) {
	auto at{state.GetLocation()};
	if (std::optional<const char *> cp{nextCh.Parse(state)}) {
	char ch{**cp};
	if (ch == '\n') {
	state.Say(CharBlock{at, state.GetLocation()},
	"Unclosed character constant"_err_en_US);
	return std::nullopt;
	}
	if (ch == '\\') {
	// Most escape sequences in character literals are processed later,
	// but we have to look for quotes here so that doubled quotes work.
	if (std::optional<const char *> next{state.PeekAtNextChar()}) {
	char escaped{**next};
	if (escaped == '\'' \|\| escaped == '"' \|\| escaped == '\\') {
	state.UncheckedAdvance();
	return std::make_pair(escaped, true);
	}
	}
	}
	return std::make_pair(ch, false);
	}
	return std::nullopt;
	}
	};

	template <char quote> struct CharLiteral {
	using resultType = std::string;
	static std::optional<std::string> Parse(ParseState &state) {
	std::string str;
	static constexpr auto nextch{attempt(CharLiteralChar{})};
	while (auto ch{nextch.Parse(state)}) {
	if (ch->second) {
	str += '\\';
	} else if (ch->first == quote) {
	static constexpr auto doubled{attempt(AnyOfChars{SetOfChars{quote}})};
	if (!doubled.Parse(state)) {
	return str;
	}
	}
	str += ch->first;
	}
	return std::nullopt;
	}
	};

	// Parse "BOZ" binary literal quoted constants.
	// As extensions, support X as an alternate hexadecimal marker, and allow
	// BOZX markers to appear as suffixes.
	struct BOZLiteral {
	using resultType = std::string;
	static std::optional<resultType> Parse(ParseState &state) {
	char base{'\0'};
	auto baseChar{[&base](char ch) -> bool {
	switch (ch) {
	case 'b':
	case 'o':
	case 'z':
	base = ch;
	return true;
	case 'x':
	base = 'z';
	return true;
	default:
	return false;
	}
	}};

	space.Parse(state);
	const char *start{state.GetLocation()};
	std::optional<const char *> at{nextCh.Parse(state)};
	if (!at) {
	return std::nullopt;
	}
	if (**at == 'x' &&
	!state.IsNonstandardOk(LanguageFeature::BOZExtensions,
	"nonstandard BOZ literal"_port_en_US)) {
	return std::nullopt;
	}
	if (baseChar(**at)) {
	at = nextCh.Parse(state);
	if (!at) {
	return std::nullopt;
	}
	}

	char quote = **at;
	if (quote != '\'' && quote != '"') {
	return std::nullopt;
	}

	std::string content;
	while (true) {
	at = nextCh.Parse(state);
	if (!at) {
	return std::nullopt;
	}
	if (**at == quote) {
	break;
	}
	if (**at == ' ') {
	continue;
	}
	if (!IsHexadecimalDigit(**at)) {
	return std::nullopt;
	}
	content += ToLowerCaseLetter(**at);
	}

	if (!base) {
	// extension: base allowed to appear as suffix, too
	if (!(at = nextCh.Parse(state)) \|\| !baseChar(**at) \|\|
	!state.IsNonstandardOk(LanguageFeature::BOZExtensions,
	"nonstandard BOZ literal"_port_en_US)) {
	return std::nullopt;
	}
	spaceCheck.Parse(state);
	}

	if (content.empty()) {
	state.Say(start, "no digit in BOZ literal"_err_en_US);
	return std::nullopt;
	}
	return {std::string{base} + '"' + content + '"'};
	}
	};

	// R711 digit-string -> digit [digit]...
	// N.B. not a token -- no space is skipped
	struct DigitString {
	using resultType = CharBlock;
	static std::optional<resultType> Parse(ParseState &state) {
	if (std::optional<const char *> ch1{state.PeekAtNextChar()}) {
	if (IsDecimalDigit(**ch1)) {
	state.UncheckedAdvance();
	while (std::optional<const char *> p{state.PeekAtNextChar()}) {
	if (!IsDecimalDigit(**p)) {
	break;
	}
	state.UncheckedAdvance();
	}
	return CharBlock{*ch1, state.GetLocation()};
	}
	}
	return std::nullopt;
	}
	};
	constexpr DigitString digitString;

	struct SignedIntLiteralConstantWithoutKind {
	using resultType = CharBlock;
	static std::optional<resultType> Parse(ParseState &state) {
	resultType result{state.GetLocation()};
	static constexpr auto sign{maybe("+-"_ch / space)};
	if (sign.Parse(state)) {
	if (auto digits{digitString.Parse(state)}) {
	result.ExtendToCover(*digits);
	return result;
	}
	}
	return std::nullopt;
	}
	};

	struct DigitString64 {
	using resultType = std::uint64_t;
	static std::optional<std::uint64_t> Parse(ParseState &state) {
	std::optional<const char *> firstDigit{digit.Parse(state)};
	if (!firstDigit) {
	return std::nullopt;
	}
	std::uint64_t value = **firstDigit - '0';
	bool overflow{false};
	static constexpr auto getDigit{attempt(digit)};
	while (auto nextDigit{getDigit.Parse(state)}) {
	if (value > std::numeric_limits<std::uint64_t>::max() / 10) {
	overflow = true;
	}
	value *= 10;
	int digitValue = **nextDigit - '0';
	if (value > std::numeric_limits<std::uint64_t>::max() - digitValue) {
	overflow = true;
	}
	value += digitValue;
	}
	if (overflow) {
	state.Say(*firstDigit, "overflow in decimal literal"_err_en_US);
	}
	return {value};
	}
	};
	constexpr DigitString64 digitString64;

	// R707 signed-int-literal-constant -> [sign] int-literal-constant
	// N.B. Spaces are consumed before and after the sign, since the sign
	// and the int-literal-constant are distinct tokens. Does not
	// handle a trailing kind parameter.
	static std::optional<std::int64_t> SignedInteger(
	const std::optional<std::uint64_t> &x, Location at, bool negate,
	ParseState &state) {
	if (!x) {
	return std::nullopt;
	}
	std::uint64_t limit{std::numeric_limits<std::int64_t>::max()};
	if (negate) {
	limit = -(limit + 1);
	}
	if (*x > limit) {
	state.Say(at, "overflow in signed decimal literal"_err_en_US);
	}
	std::int64_t value = *x;
	return std::make_optional<std::int64_t>(negate ? -value : value);
	}

	// R710 signed-digit-string -> [sign] digit-string
	// N.B. Not a complete token -- no space is skipped.
	// Used only in the exponent parts of real literal constants.
	struct SignedDigitString {
	using resultType = std::int64_t;
	static std::optional<std::int64_t> Parse(ParseState &state) {
	std::optional<const char *> sign{state.PeekAtNextChar()};
	if (!sign) {
	return std::nullopt;
	}
	bool negate{**sign == '-'};
	if (negate \|\| **sign == '+') {
	state.UncheckedAdvance();
	}
	return SignedInteger(digitString64.Parse(state), *sign, negate, state);
	}
	};

	// Variants of the above for use in FORMAT specifications, where spaces
	// must be ignored.
	struct DigitStringIgnoreSpaces {
	using resultType = std::uint64_t;
	static std::optional<std::uint64_t> Parse(ParseState &state) {
	static constexpr auto getFirstDigit{space >> digit};
	std::optional<const char *> firstDigit{getFirstDigit.Parse(state)};
	if (!firstDigit) {
	return std::nullopt;
	}
	std::uint64_t value = **firstDigit - '0';
	bool overflow{false};
	static constexpr auto getDigit{space >> attempt(digit)};
	while (auto nextDigit{getDigit.Parse(state)}) {
	if (value > std::numeric_limits<std::uint64_t>::max() / 10) {
	overflow = true;
	}
	value *= 10;
	int digitValue = **nextDigit - '0';
	if (value > std::numeric_limits<std::uint64_t>::max() - digitValue) {
	overflow = true;
	}
	value += digitValue;
	}
	if (overflow) {
	state.Say(*firstDigit, "overflow in decimal literal"_err_en_US);
	}
	return value;
	}
	};

	struct PositiveDigitStringIgnoreSpaces {
	using resultType = std::int64_t;
	static std::optional<std::int64_t> Parse(ParseState &state) {
	Location at{state.GetLocation()};
	return SignedInteger(
	DigitStringIgnoreSpaces{}.Parse(state), at, false /positive/, state);
	}
	};

	struct SignedDigitStringIgnoreSpaces {
	using resultType = std::int64_t;
	static std::optional<std::int64_t> Parse(ParseState &state) {
	static constexpr auto getSign{space >> attempt("+-"_ch)};
	bool negate{false};
	if (std::optional<const char *> sign{getSign.Parse(state)}) {
	negate = **sign == '-';
	}
	Location at{state.GetLocation()};
	return SignedInteger(
	DigitStringIgnoreSpaces{}.Parse(state), at, negate, state);
	}
	};

	// Legacy feature: Hollerith literal constants
	struct HollerithLiteral {
	using resultType = std::string;
	static std::optional<std::string> Parse(ParseState &state) {
	space.Parse(state);
	const char *start{state.GetLocation()};
	std::optional<std::uint64_t> charCount{
	DigitStringIgnoreSpaces{}.Parse(state)};
	if (!charCount \|\| *charCount < 1) {
	return std::nullopt;
	}
	static constexpr auto letterH{"h"_ch};
	std::optional<const char *> h{letterH.Parse(state)};
	if (!h) {
	return std::nullopt;
	}
	std::string content;
	for (auto j{*charCount}; j-- > 0;) {
	int chBytes{UTF_8CharacterBytes(state.GetLocation())};
	for (int bytes{chBytes}; bytes > 0; --bytes) {
	if (std::optional<const char *> at{nextCh.Parse(state)}) {
	if (chBytes == 1 && !IsPrintable(**at)) {
	state.Say(start, "Bad character in Hollerith"_err_en_US);
	return std::nullopt;
	}
	content += **at;
	} else {
	state.Say(start, "Insufficient characters in Hollerith"_err_en_US);
	return std::nullopt;
	}
	}
	}
	return content;
	}
	};

	struct ConsumedAllInputParser {
	using resultType = Success;
	constexpr ConsumedAllInputParser() {}
	static inline std::optional<Success> Parse(ParseState &state) {
	if (state.IsAtEnd()) {
	return {Success{}};
	}
	return std::nullopt;
	}
	};
	constexpr ConsumedAllInputParser consumedAllInput;

	template <char goal> struct SkipPast {
	using resultType = Success;
	constexpr SkipPast() {}
	constexpr SkipPast(const SkipPast &) {}
	static std::optional<Success> Parse(ParseState &state) {
	while (std::optional<const char *> p{state.GetNextChar()}) {
	if (**p == goal) {
	return {Success{}};
	}
	}
	return std::nullopt;
	}
	};

	template <char goal> struct SkipTo {
	using resultType = Success;
	constexpr SkipTo() {}
	constexpr SkipTo(const SkipTo &) {}
	static std::optional<Success> Parse(ParseState &state) {
	while (std::optional<const char *> p{state.PeekAtNextChar()}) {
	if (**p == goal) {
	return {Success{}};
	}
	state.UncheckedAdvance();
	}
	return std::nullopt;
	}
	};

	// A common idiom in the Fortran grammar is an optional item (usually
	// a nonempty comma-separated list) that, if present, must follow a comma
	// and precede a doubled colon. When the item is absent, the comma must
	// not appear, and the doubled colons are optional.
	// [[, xyz] ::] is optionalBeforeColons(xyz)
	// [[, xyz]... ::] is optionalBeforeColons(nonemptyList(xyz))
	template <typename PA> inline constexpr auto optionalBeforeColons(const PA &p) {
	using resultType = std::optional<typename PA::resultType>;
	return "," >> construct<resultType>(p) / "::" \|\|
	("::"_tok \|\| !","_tok) >> pure<resultType>();
	}
	template <typename PA>
	inline constexpr auto optionalListBeforeColons(const PA &p) {
	using resultType = std::list<typename PA::resultType>;
	return "," >> nonemptyList(p) / "::" \|\|
	("::"_tok \|\| !","_tok) >> pure<resultType>();
	}

	// Skip over empty lines, leading spaces, and some compiler directives (viz.,
	// the ones that specify the source form) that might appear before the
	// next statement. Skip over empty statements (bare semicolons) when
	// not in strict standard conformance mode. Always succeeds.
	struct SkipStuffBeforeStatement {
	using resultType = Success;
	static std::optional<Success> Parse(ParseState &state) {
	if (UserState * ustate{state.userState()}) {
	if (ParsingLog * log{ustate->log()}) {
	// Save memory: vacate the parsing log before each statement unless
	// we're logging the whole parse for debugging.
	if (!ustate->instrumentedParse()) {
	log->clear();
	}
	}
	}
	while (std::optional<const char *> at{state.PeekAtNextChar()}) {
	if (at == '\n' \|\| at == ' ') {
	state.UncheckedAdvance();
	} else if (**at == '!') {
	static const char fixed[] = "!dir$ fixed\n", free[] = "!dir$ free\n";
	static constexpr std::size_t fixedBytes{sizeof fixed - 1};
	static constexpr std::size_t freeBytes{sizeof free - 1};
	std::size_t remain{state.BytesRemaining()};
	if (remain >= fixedBytes && std::memcmp(*at, fixed, fixedBytes) == 0) {
	state.set_inFixedForm(true).UncheckedAdvance(fixedBytes);
	} else if (remain >= freeBytes &&
	std::memcmp(*at, free, freeBytes) == 0) {
	state.set_inFixedForm(false).UncheckedAdvance(freeBytes);
	} else {
	break;
	}
	} else if (**at == ';' &&
	state.IsNonstandardOk(
	LanguageFeature::EmptyStatement, "empty statement"_port_en_US)) {
	state.UncheckedAdvance();
	} else {
	break;
	}
	}
	return {Success{}};
	}
	};
	constexpr SkipStuffBeforeStatement skipStuffBeforeStatement;

	// R602 underscore -> _
	constexpr auto underscore{"_"_ch};

	// Characters besides letters and digits that may appear in names.
	// N.B. Don't accept an underscore if it is immediately followed by a
	// quotation mark, so that kindParam_"character literal" is parsed properly.
	// PGI and ifort accept '$' in identifiers, even as the initial character.
	// Cray and gfortran accept '$', but not as the first character.
	// Cray accepts '@' as well.
	constexpr auto otherIdChar{underscore / !"'\""_ch \|\|
	extension<LanguageFeature::PunctuationInNames>(
	"nonstandard usage: punctuation in name"_port_en_US, "$@"_ch)};

	constexpr auto logicalTRUE{
	(".TRUE."_tok \|\|
	extension<LanguageFeature::LogicalAbbreviations>(
	"nonstandard usage: .T. spelling of .TRUE."_port_en_US,
	".T."_tok)) >>
	pure(true)};
	constexpr auto logicalFALSE{
	(".FALSE."_tok \|\|
	extension<LanguageFeature::LogicalAbbreviations>(
	"nonstandard usage: .F. spelling of .FALSE."_port_en_US,
	".F."_tok)) >>
	pure(false)};

	// deprecated: Hollerith literals
	constexpr auto rawHollerithLiteral{
	deprecated<LanguageFeature::Hollerith>(HollerithLiteral{})};

	template <typename A> constexpr decltype(auto) verbatim(A x) {
	return sourced(construct<Verbatim>(x));
	}

	} // namespace Fortran::parser
	#endif // FORTRAN_PARSER_TOKEN_PARSERS_H_