| //===-- lib/Parser/source.cpp ---------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "flang/Parser/source.h" |
| #include "flang/Common/idioms.h" |
| #include "flang/Parser/char-buffer.h" |
| #include "llvm/Support/Errno.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <algorithm> |
| #include <memory> |
| #include <vector> |
| |
| namespace Fortran::parser { |
| |
| SourceFile::~SourceFile() { Close(); } |
| |
| static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) { |
| std::vector<std::size_t> result; |
| if (source.size() > 0) { |
| CHECK(source.back() == '\n' && "missing ultimate newline"); |
| std::size_t at{0}; |
| do { |
| result.push_back(at); |
| at = source.find('\n', at) + 1; |
| } while (at < source.size()); |
| result.shrink_to_fit(); |
| } |
| return result; |
| } |
| |
| void SourceFile::RecordLineStarts() { |
| lineStart_ = FindLineStarts({content().data(), bytes()}); |
| } |
| |
| // Check for a Unicode byte order mark (BOM). |
| // Module files all have one; so can source files. |
| void SourceFile::IdentifyPayload() { |
| llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; |
| constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; |
| if (content.startswith(UTF8_BOM)) { |
| bom_end_ = UTF8_BOM.size(); |
| encoding_ = Encoding::UTF_8; |
| } |
| } |
| |
| std::string DirectoryName(std::string path) { |
| llvm::SmallString<128> pathBuf{path}; |
| llvm::sys::path::remove_filename(pathBuf); |
| return pathBuf.str().str(); |
| } |
| |
| std::optional<std::string> LocateSourceFile( |
| std::string name, const std::list<std::string> &searchPath) { |
| if (name == "-" || llvm::sys::path::is_absolute(name)) { |
| return name; |
| } |
| for (const std::string &dir : searchPath) { |
| llvm::SmallString<128> path{dir}; |
| llvm::sys::path::append(path, name); |
| bool isDir{false}; |
| auto er = llvm::sys::fs::is_directory(path, isDir); |
| if (!er && !isDir) { |
| return path.str().str(); |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { |
| std::size_t wrote{0}; |
| char *buffer{buf.data()}; |
| char *p{buf.data()}; |
| std::size_t bytes = buf.size(); |
| while (bytes > 0) { |
| void *vp{static_cast<void *>(p)}; |
| void *crvp{std::memchr(vp, '\r', bytes)}; |
| char *crcp{static_cast<char *>(crvp)}; |
| if (!crcp) { |
| std::memmove(buffer + wrote, p, bytes); |
| wrote += bytes; |
| break; |
| } |
| std::size_t chunk = crcp - p; |
| auto advance{chunk + 1}; |
| if (chunk + 1 >= bytes || crcp[1] == '\n') { |
| // CR followed by LF or EOF: omit |
| } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { |
| // CR preceded by LF or BOF: omit |
| } else { |
| // CR in line: retain |
| ++chunk; |
| } |
| std::memmove(buffer + wrote, p, chunk); |
| wrote += chunk; |
| p += advance; |
| bytes -= advance; |
| } |
| return wrote; |
| } |
| |
| bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { |
| Close(); |
| path_ = path; |
| std::string errorPath{"'"s + path_ + "'"}; |
| auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; |
| if (!bufOr) { |
| auto err = bufOr.getError(); |
| error << "Could not open " << errorPath << ": " << err.message(); |
| return false; |
| } |
| buf_ = std::move(bufOr.get()); |
| ReadFile(); |
| return true; |
| } |
| |
| bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { |
| Close(); |
| path_ = "standard input"; |
| auto buf_or = llvm::MemoryBuffer::getSTDIN(); |
| if (!buf_or) { |
| auto err = buf_or.getError(); |
| error << err.message(); |
| return false; |
| } |
| auto inbuf = std::move(buf_or.get()); |
| buf_ = |
| llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); |
| llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); |
| ReadFile(); |
| return true; |
| } |
| |
| void SourceFile::ReadFile() { |
| buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); |
| if (content().size() == 0 || content().back() != '\n') { |
| // Don't bother to copy if we have spare memory |
| if (content().size() >= buf_->getBufferSize()) { |
| auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( |
| content().size() + 1)}; |
| llvm::copy(content(), tmp_buf->getBufferStart()); |
| buf_ = std::move(tmp_buf); |
| } |
| buf_end_++; |
| buf_->getBuffer()[buf_end_ - 1] = '\n'; |
| } |
| IdentifyPayload(); |
| RecordLineStarts(); |
| } |
| |
| void SourceFile::Close() { |
| path_.clear(); |
| buf_.reset(); |
| } |
| |
| SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { |
| CHECK(at < bytes()); |
| |
| auto it = llvm::upper_bound(lineStart_, at); |
| auto low = std::distance(lineStart_.begin(), it - 1); |
| return {*this, static_cast<int>(low + 1), |
| static_cast<int>(at - lineStart_[low] + 1)}; |
| } |
| } // namespace Fortran::parser |