[flang] Catch mismatched parentheses in prescanner Source lines with mismatched parentheses are hard cases for error recovery in parsing, and the best error message (viz., "here's an unmatched parenthesis") can be emitted from the prescanner. Differential Revision: https://reviews.llvm.org/D111254#3046173 GitOrigin-RevId: 094b380c210ac508a1ad5a7c47a760773b0cc6ea

commit: 0835f5a42d1194f58281c243e88dbfb1a7407570 [log] [tgz]
author: peter klausler <pklausler@nvidia.com> Thu Sep 30 15:33:19 2021 -0700
committer: Copybara-Service <copybara-worker@google.com> Wed Oct 06 14:04:45 2021 -0700
tree: 57c292a9bb2c210dd48b9c250a1c8b4456861f65
parent: ce394330c437f3921f611f33b9f2c35e4caced3f [diff]
diff --git a/lib/Parser/prescan.cpp b/lib/Parser/prescan.cpp
index 0531ffd..7d7ee36 100644
--- a/lib/Parser/prescan.cpp
+++ b/lib/Parser/prescan.cpp

@@ -169,7 +169,7 @@
     preprocessed->CloseToken();
     const char *ppd{preprocessed->ToCharBlock().begin()};
     LineClassification ppl{ClassifyLine(ppd)};
-    preprocessed->RemoveLastToken(); // remove the newline
+    preprocessed->pop_back(); // remove the newline
     switch (ppl.kind) {
     case LineClassification::Kind::Comment:
       break;
@@ -182,8 +182,10 @@
     case LineClassification::Kind::PreprocessorDirective:
       Say(preprocessed->GetProvenanceRange(),
           "Preprocessed line resembles a preprocessor directive"_en_US);
-      preprocessed->ToLowerCase().CheckBadFortranCharacters(messages_).Emit(
-          cooked_);
+      preprocessed->ToLowerCase()
+          .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
+          .Emit(cooked_);
       break;
     case LineClassification::Kind::CompilerDirective:
       if (preprocessed->HasRedundantBlanks()) {
@@ -194,6 +196,7 @@
       SourceFormChange(preprocessed->ToString());
       preprocessed->ClipComment(true /* skip first ! */)
           .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
     case LineClassification::Kind::Source:
@@ -209,6 +212,7 @@
       preprocessed->ToLowerCase()
           .ClipComment()
           .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
     }
@@ -220,7 +224,9 @@
     if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
       EnforceStupidEndStatementRules(tokens);
     }
-    tokens.CheckBadFortranCharacters(messages_).Emit(cooked_);
+    tokens.CheckBadFortranCharacters(messages_)
+        .CheckBadParentheses(messages_)
+        .Emit(cooked_);
   }
   if (omitNewline_) {
     omitNewline_ = false;
@@ -255,6 +261,7 @@
 void Prescanner::LabelField(TokenSequence &token) {
   const char *bad{nullptr};
   int outCol{1};
+  const char *start{at_};
   for (; *at_ != '\n' && column_ <= 6; ++at_) {
     if (*at_ == '\t') {
       ++at_;
@@ -271,17 +278,19 @@
     }
     ++column_;
   }
+  if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
+    Say(GetProvenance(bad),
+        "Character in fixed-form label field must be a digit"_en_US);
+    token.clear();
+    at_ = start;
+    return;
+  }
   if (outCol == 1) { // empty label field
     // Emit a space so that, if the line is rescanned after preprocessing,
     // a leading 'C' or 'D' won't be left-justified and then accidentally
     // misinterpreted as a comment card.
     EmitChar(token, ' ');
     ++outCol;
-  } else {
-    if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
-      Say(GetProvenance(bad),
-          "Character in fixed-form label field must be a digit"_en_US);
-    }
   }
   token.CloseToken();
   SkipToNextSignificantCharacter();

diff --git a/lib/Parser/token-sequence.cpp b/lib/Parser/token-sequence.cpp
index 3ed9d05..55217a9 100644
--- a/lib/Parser/token-sequence.cpp
+++ b/lib/Parser/token-sequence.cpp

@@ -27,6 +27,8 @@
 }
 
 void TokenSequence::pop_back() {
+  CHECK(!start_.empty());
+  CHECK(nextStart_ > start_.back());
   std::size_t bytes{nextStart_ - start_.back()};
   nextStart_ = start_.back();
   start_.pop_back();
@@ -82,16 +84,6 @@
   return false;
 }
 
-void TokenSequence::RemoveLastToken() {
-  CHECK(!start_.empty());
-  CHECK(nextStart_ > start_.back());
-  std::size_t bytes{nextStart_ - start_.back()};
-  nextStart_ = start_.back();
-  start_.pop_back();
-  char_.erase(char_.begin() + nextStart_, char_.end());
-  provenances_.RemoveLastBytes(bytes);
-}
-
 void TokenSequence::Put(const TokenSequence &that) {
   if (nextStart_ < char_.size()) {
     start_.push_back(nextStart_);
@@ -338,4 +330,41 @@
   }
   return *this;
 }
+
+const TokenSequence &TokenSequence::CheckBadParentheses(
+    Messages &messages) const {
+  // First, a quick pass with no allocation for the common case
+  int nesting{0};
+  std::size_t tokens{SizeInTokens()};
+  for (std::size_t j{0}; j < tokens; ++j) {
+    CharBlock token{TokenAt(j)};
+    char ch{token.FirstNonBlank()};
+    if (ch == '(') {
+      ++nesting;
+    } else if (ch == ')') {
+      --nesting;
+    }
+  }
+  if (nesting != 0) {
+    // There's an error; diagnose it
+    std::vector<std::size_t> stack;
+    for (std::size_t j{0}; j < tokens; ++j) {
+      CharBlock token{TokenAt(j)};
+      char ch{token.FirstNonBlank()};
+      if (ch == '(') {
+        stack.push_back(j);
+      } else if (ch == ')') {
+        if (stack.empty()) {
+          messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
+          return *this;
+        }
+        stack.pop_back();
+      }
+    }
+    CHECK(!stack.empty());
+    messages.Say(
+        GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
+  }
+  return *this;
+}
 } // namespace Fortran::parser

diff --git a/lib/Parser/token-sequence.h b/lib/Parser/token-sequence.h
index 6da6229..c039126 100644
--- a/lib/Parser/token-sequence.h
+++ b/lib/Parser/token-sequence.h

@@ -92,8 +92,6 @@
     start_.pop_back();
   }
 
-  void RemoveLastToken();
-
   void Put(const TokenSequence &);
   void Put(const TokenSequence &, ProvenanceRange);
   void Put(const TokenSequence &, std::size_t at, std::size_t tokens = 1);
@@ -119,6 +117,7 @@
   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
   TokenSequence &ClipComment(bool skipFirst = false);
   const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+  const TokenSequence &CheckBadParentheses(Messages &) const;
   void Emit(CookedSource &) const;
   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
 

diff --git a/test/Parser/unmatched-parens.f90 b/test/Parser/unmatched-parens.f90
new file mode 100644
index 0000000..c0a3e84
--- /dev/null
+++ b/test/Parser/unmatched-parens.f90

@@ -0,0 +1,8 @@
+! RUN: not %flang_fc1 -E %s 2>&1 | FileCheck %s
+do i = 1,10
+  ! CHECK: Unmatched '('
+  if (i != 0) then
+    exit
+  endif
+enddo
+end
commit	0835f5a42d1194f58281c243e88dbfb1a7407570	[log] [tgz]
author	peter klausler <pklausler@nvidia.com>	Thu Sep 30 15:33:19 2021 -0700
committer	Copybara-Service <copybara-worker@google.com>	Wed Oct 06 14:04:45 2021 -0700
tree	57c292a9bb2c210dd48b9c250a1c8b4456861f65
parent	ce394330c437f3921f611f33b9f2c35e4caced3f [diff]