Make positionToOffset return llvm::Expected<size_t>

Summary:

To implement incremental document syncing, we want to verify that the
ranges provided by the front-end are valid.  Currently, positionToOffset
deals with invalid Positions by returning 0 or Code.size(), which are
two valid offsets.  Instead, return an llvm:Expected<size_t> with an
error if the position is invalid.

According to the LSP, if the character value exceeds the number of
characters of the given line, it should default back to the end of the
line.  It makes sense in some contexts to have this behavior, and does
not in other contexts.  The AllowColumnsBeyondLineLength parameter
allows to decide what to do in that case, default back to the end of the
line, or return an error.

Reviewers: ilya-biryukov

Subscribers: klimek, ilya-biryukov, jkorous-apple, ioeric, cfe-commits

Differential Revision: https://reviews.llvm.org/D44673

git-svn-id: https://llvm.org/svn/llvm-project/clang-tools-extra/trunk@328100 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/clangd/ClangdServer.cpp b/clangd/ClangdServer.cpp
index 4d49072..dbb3ce4 100644
--- a/clangd/ClangdServer.cpp
+++ b/clangd/ClangdServer.cpp
@@ -190,9 +190,13 @@
 
 llvm::Expected<tooling::Replacements>
 ClangdServer::formatRange(StringRef Code, PathRef File, Range Rng) {
-  size_t Begin = positionToOffset(Code, Rng.start);
-  size_t Len = positionToOffset(Code, Rng.end) - Begin;
-  return formatCode(Code, File, {tooling::Range(Begin, Len)});
+  llvm::Expected<size_t> Begin = positionToOffset(Code, Rng.start);
+  if (!Begin)
+    return Begin.takeError();
+  llvm::Expected<size_t> End = positionToOffset(Code, Rng.end);
+  if (!End)
+    return End.takeError();
+  return formatCode(Code, File, {tooling::Range(*Begin, *End - *Begin)});
 }
 
 llvm::Expected<tooling::Replacements> ClangdServer::formatFile(StringRef Code,
@@ -205,11 +209,13 @@
 ClangdServer::formatOnType(StringRef Code, PathRef File, Position Pos) {
   // Look for the previous opening brace from the character position and
   // format starting from there.
-  size_t CursorPos = positionToOffset(Code, Pos);
-  size_t PreviousLBracePos = StringRef(Code).find_last_of('{', CursorPos);
+  llvm::Expected<size_t> CursorPos = positionToOffset(Code, Pos);
+  if (!CursorPos)
+    return CursorPos.takeError();
+  size_t PreviousLBracePos = StringRef(Code).find_last_of('{', *CursorPos);
   if (PreviousLBracePos == StringRef::npos)
-    PreviousLBracePos = CursorPos;
-  size_t Len = CursorPos - PreviousLBracePos;
+    PreviousLBracePos = *CursorPos;
+  size_t Len = *CursorPos - PreviousLBracePos;
 
   return formatCode(Code, File, {tooling::Range(PreviousLBracePos, Len)});
 }
diff --git a/clangd/SourceCode.cpp b/clangd/SourceCode.cpp
index 191f3c2..8e1db83 100644
--- a/clangd/SourceCode.cpp
+++ b/clangd/SourceCode.cpp
@@ -9,23 +9,43 @@
 #include "SourceCode.h"
 
 #include "clang/Basic/SourceManager.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
 
 namespace clang {
 namespace clangd {
 using namespace llvm;
 
-size_t positionToOffset(StringRef Code, Position P) {
+llvm::Expected<size_t> positionToOffset(StringRef Code, Position P,
+                                        bool AllowColumnsBeyondLineLength) {
   if (P.line < 0)
-    return 0;
+    return llvm::make_error<llvm::StringError>(
+        llvm::formatv("Line value can't be negative ({0})", P.line),
+        llvm::errc::invalid_argument);
+  if (P.character < 0)
+    return llvm::make_error<llvm::StringError>(
+        llvm::formatv("Character value can't be negative ({0})", P.character),
+        llvm::errc::invalid_argument);
   size_t StartOfLine = 0;
   for (int I = 0; I != P.line; ++I) {
     size_t NextNL = Code.find('\n', StartOfLine);
     if (NextNL == StringRef::npos)
-      return Code.size();
+      return llvm::make_error<llvm::StringError>(
+          llvm::formatv("Line value is out of range ({0})", P.line),
+          llvm::errc::invalid_argument);
     StartOfLine = NextNL + 1;
   }
+
+  size_t NextNL = Code.find('\n', StartOfLine);
+  if (NextNL == StringRef::npos)
+    NextNL = Code.size();
+
+  if (StartOfLine + P.character > NextNL && !AllowColumnsBeyondLineLength)
+    return llvm::make_error<llvm::StringError>(
+        llvm::formatv("Character value is out of range ({0})", P.character),
+        llvm::errc::invalid_argument);
   // FIXME: officially P.character counts UTF-16 code units, not UTF-8 bytes!
-  return std::min(Code.size(), StartOfLine + std::max(0, P.character));
+  return std::min(NextNL, StartOfLine + P.character);
 }
 
 Position offsetToPosition(StringRef Code, size_t Offset) {
diff --git a/clangd/SourceCode.h b/clangd/SourceCode.h
index 2bf5d0e..6a2efd9 100644
--- a/clangd/SourceCode.h
+++ b/clangd/SourceCode.h
@@ -22,12 +22,27 @@
 namespace clangd {
 
 /// Turn a [line, column] pair into an offset in Code.
-size_t positionToOffset(llvm::StringRef Code, Position P);
+///
+/// If the character value is greater than the line length, the behavior depends
+/// on AllowColumnsBeyondLineLength:
+///
+/// - if true: default back to the end of the line
+/// - if false: return an error
+///
+/// If the line number is greater than the number of lines in the document,
+/// always return an error.
+///
+/// The returned value is in the range [0, Code.size()].
+llvm::Expected<size_t>
+positionToOffset(llvm::StringRef Code, Position P,
+                 bool AllowColumnsBeyondLineLength = true);
 
 /// Turn an offset in Code into a [line, column] pair.
+/// FIXME: This should return an error if the offset is invalid.
 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
 
 /// Turn a SourceLocation into a [line, column] pair.
+/// FIXME: This should return an error if the location is invalid.
 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
 
 // Converts a half-open clang source range to an LSP range.
diff --git a/unittests/clangd/Annotations.cpp b/unittests/clangd/Annotations.cpp
index 92d1cb1..fc9c4cb 100644
--- a/unittests/clangd/Annotations.cpp
+++ b/unittests/clangd/Annotations.cpp
@@ -86,7 +86,11 @@
 std::pair<std::size_t, std::size_t>
 Annotations::offsetRange(llvm::StringRef Name) const {
   auto R = range(Name);
-  return {positionToOffset(Code, R.start), positionToOffset(Code, R.end)};
+  llvm::Expected<size_t> Start = positionToOffset(Code, R.start);
+  llvm::Expected<size_t> End = positionToOffset(Code, R.end);
+  assert(Start);
+  assert(End);
+  return {*Start, *End};
 }
 
 } // namespace clangd
diff --git a/unittests/clangd/CMakeLists.txt b/unittests/clangd/CMakeLists.txt
index 6017868..214866a 100644
--- a/unittests/clangd/CMakeLists.txt
+++ b/unittests/clangd/CMakeLists.txt
@@ -43,4 +43,5 @@
   clangTooling
   clangToolingCore
   LLVMSupport
+  LLVMTestingSupport
   )
diff --git a/unittests/clangd/SourceCodeTests.cpp b/unittests/clangd/SourceCodeTests.cpp
index 86c8a09..d787641 100644
--- a/unittests/clangd/SourceCodeTests.cpp
+++ b/unittests/clangd/SourceCodeTests.cpp
@@ -7,7 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 #include "SourceCode.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Testing/Support/Error.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -15,6 +17,9 @@
 namespace clangd {
 namespace {
 
+using llvm::Failed;
+using llvm::HasValue;
+
 MATCHER_P2(Pos, Line, Col, "") {
   return arg.line == Line && arg.character == Col;
 }
@@ -33,30 +38,57 @@
 
 TEST(SourceCodeTests, PositionToOffset) {
   // line out of bounds
-  EXPECT_EQ(0u, positionToOffset(File, position(-1, 2)));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());
   // first line
-  EXPECT_EQ(0u, positionToOffset(File, position(0, -1))); // out of range
-  EXPECT_EQ(0u, positionToOffset(File, position(0, 0)));  // first character
-  EXPECT_EQ(3u, positionToOffset(File, position(0, 3)));  // middle character
-  EXPECT_EQ(6u, positionToOffset(File, position(0, 6)));  // last character
-  EXPECT_EQ(7u, positionToOffset(File, position(0, 7)));  // the newline itself
-  EXPECT_EQ(8u, positionToOffset(File, position(0, 8)));  // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, -1)),
+                       Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 0)),
+                       HasValue(0)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 3)),
+                       HasValue(3)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 6)),
+                       HasValue(6)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7)),
+                       HasValue(7)); // the newline itself
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7), false),
+                       HasValue(7));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8)),
+                       HasValue(7)); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8), false),
+                       Failed()); // out of range
   // middle line
-  EXPECT_EQ(8u, positionToOffset(File, position(1, -1))); // out of range
-  EXPECT_EQ(8u, positionToOffset(File, position(1, 0)));  // first character
-  EXPECT_EQ(11u, positionToOffset(File, position(1, 3))); // middle character
-  EXPECT_EQ(14u, positionToOffset(File, position(1, 6))); // last character
-  EXPECT_EQ(15u, positionToOffset(File, position(1, 7))); // the newline itself
-  EXPECT_EQ(16u, positionToOffset(File, position(1, 8))); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, -1)),
+                       Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 0)),
+                       HasValue(8)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3)),
+                       HasValue(11)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false),
+                       HasValue(11));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)),
+                       HasValue(14)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)),
+                       HasValue(15)); // the newline itself
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)),
+                       HasValue(15)); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false),
+                       Failed()); // out of range
   // last line
-  EXPECT_EQ(16u, positionToOffset(File, position(2, -1))); // out of range
-  EXPECT_EQ(16u, positionToOffset(File, position(2, 0)));  // first character
-  EXPECT_EQ(19u, positionToOffset(File, position(2, 3)));  // middle character
-  EXPECT_EQ(23u, positionToOffset(File, position(2, 7)));  // last character
-  EXPECT_EQ(24u, positionToOffset(File, position(2, 8)));  // EOF
-  EXPECT_EQ(24u, positionToOffset(File, position(2, 9)));  // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)),
+                       Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)),
+                       HasValue(16)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 3)),
+                       HasValue(19)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 7)),
+                       HasValue(23)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)),
+                       HasValue(24)); // EOF
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9), false),
+                       Failed()); // out of range
   // line out of bounds
-  EXPECT_EQ(24u, positionToOffset(File, position(3, 1)));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), Failed());
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), Failed());
 }
 
 TEST(SourceCodeTests, OffsetToPosition) {