[flang] Enforce fixed form rules about END continuation

From subclause 6.3.3.5: a program unit END statement cannot be
continued in fixed form, and other statements cannot have initial
lines that look like program unit END statements.  I think this
is to avoid violating assumptions that are important to legacy
compilers' statement classification routines.

Differential Revision: https://reviews.llvm.org/D109933

GitOrigin-RevId: f6ddfac401b8a5dfbf7623ebd8800df5184fa244
diff --git a/lib/Parser/prescan.cpp b/lib/Parser/prescan.cpp
index 18d69d1..1408fdf 100644
--- a/lib/Parser/prescan.cpp
+++ b/lib/Parser/prescan.cpp
@@ -217,6 +217,9 @@
     if (line.kind == LineClassification::Kind::CompilerDirective) {
       SourceFormChange(tokens.ToString());
     }
+    if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
+      EnforceStupidEndStatementRules(tokens);
+    }
     tokens.CheckBadFortranCharacters(messages_).Emit(cooked_);
   }
   if (omitNewline_) {
@@ -288,6 +291,67 @@
   }
 }
 
+// 6.3.3.5: A program unit END statement, or any other statement whose
+// initial line resembles an END statement, shall not be continued in
+// fixed form source.
+void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
+  CharBlock cBlock{tokens.ToCharBlock()};
+  const char *str{cBlock.begin()};
+  std::size_t n{cBlock.size()};
+  if (n < 3) {
+    return;
+  }
+  std::size_t j{0};
+  for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
+  }
+  if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) {
+    return;
+  }
+  // It starts with END, possibly after a label.
+  auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
+  auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
+  if (!start || !end) {
+    return;
+  }
+  if (&start->file == &end->file && start->line == end->line) {
+    return; // no continuation
+  }
+  j += 3;
+  static const char *const prefixes[]{"program", "subroutine", "function",
+      "blockdata", "module", "submodule", nullptr};
+  CharBlock stmt{tokens.ToCharBlock()};
+  bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
+  std::size_t endOfPrefix{j - 1};
+  for (const char *const *p{prefixes}; *p; ++p) {
+    std::size_t pLen{std::strlen(*p)};
+    if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) {
+      isPrefix = true; // END thing as prefix
+      j += pLen;
+      endOfPrefix = j - 1;
+      for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
+      }
+      break;
+    }
+  }
+  if (isPrefix) {
+    auto range{tokens.GetTokenProvenanceRange(1)};
+    if (j == n) { // END or END thing [name]
+      Say(range,
+          "Program unit END statement may not be continued in fixed form source"_err_en_US);
+    } else {
+      auto endOfPrefixPos{
+          allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
+      auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
+      if (endOfPrefixPos && next && &endOfPrefixPos->file == &start->file &&
+          endOfPrefixPos->line == start->line &&
+          (&next->file != &start->file || next->line != start->line)) {
+        Say(range,
+            "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
+      }
+    }
+  }
+}
+
 void Prescanner::SkipToEndOfLine() {
   while (*at_ != '\n') {
     ++at_, ++column_;
diff --git a/lib/Parser/prescan.h b/lib/Parser/prescan.h
index 2f1b83e..4d002ad 100644
--- a/lib/Parser/prescan.h
+++ b/lib/Parser/prescan.h
@@ -150,6 +150,7 @@
   }
 
   void LabelField(TokenSequence &);
+  void EnforceStupidEndStatementRules(const TokenSequence &);
   void SkipToEndOfLine();
   bool MustSkipToEndOfLine() const;
   void NextChar();
diff --git a/lib/Parser/token-sequence.cpp b/lib/Parser/token-sequence.cpp
index 0a959f2..3ed9d05 100644
--- a/lib/Parser/token-sequence.cpp
+++ b/lib/Parser/token-sequence.cpp
@@ -286,10 +286,14 @@
   return o;
 }
 
+Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
+  ProvenanceRange range{provenances_.Map(offset)};
+  return range.start();
+}
+
 Provenance TokenSequence::GetTokenProvenance(
     std::size_t token, std::size_t offset) const {
-  ProvenanceRange range{provenances_.Map(start_[token] + offset)};
-  return range.start();
+  return GetCharProvenance(start_[token] + offset);
 }
 
 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
diff --git a/lib/Parser/token-sequence.h b/lib/Parser/token-sequence.h
index 16cef37..6da6229 100644
--- a/lib/Parser/token-sequence.h
+++ b/lib/Parser/token-sequence.h
@@ -102,6 +102,7 @@
   void Put(const std::string &, Provenance);
   void Put(llvm::raw_string_ostream &, Provenance);
 
+  Provenance GetCharProvenance(std::size_t) const;
   Provenance GetTokenProvenance(
       std::size_t token, std::size_t offset = 0) const;
   ProvenanceRange GetTokenProvenanceRange(
diff --git a/test/Parser/end.f b/test/Parser/end.f
new file mode 100644
index 0000000..e0225e6
--- /dev/null
+++ b/test/Parser/end.f
@@ -0,0 +1,29 @@
+! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s
+! CHECK: end.f:3:7: error: Program unit END statement may not be continued in fixed form source
+      e
+     + nd
+! CHECK: end.f:6:7: error: Program unit END statement may not be continued in fixed form source
+      end prog
+     +        ram
+! CHECK: end.f:9:7: error: Program unit END statement may not be continued in fixed form source
+      end
+     +       program
+! CHECK: end.f:12:7: error: Program unit END statement may not be continued in fixed form source
+      end
+     +       program
+     1                main
+! CHECK: end.f:16:7: error: Program unit END statement may not be continued in fixed form source
+      end program
+     1            main
+! CHECK: end.f:19:7: error: Initial line of continued statement must not appear to be a program unit END in fixed form source
+      end
+     +    = end + 1
+! CHECK: end.f:22:7: error: Initial line of continued statement must not appear to be a program unit END in fixed form source
+      end module
+     +    = end module + 1
+! CHECK-NOT: end.f:25:7: error: Initial line of continued statement must not appear to be a program unit END in fixed form source
+      end =
+     +      end + 1
+! CHECK-NOT: end.f:28:7: error: Initial line of continued statement must not appear to be a program unit END in fixed form source
+      end block data (
+     +      1) = 666