//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include <string.h>

#include "GoLexer.h"

using namespace lldb_private;

llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;

GoLexer::GoLexer(const char *src) : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "")
{
}

bool
GoLexer::SkipWhitespace()
{
    bool saw_newline = false;
    for (; m_src < m_end; ++m_src)
    {
        if (*m_src == '\n')
            saw_newline = true;
        if (*m_src == '/' && !SkipComment())
            return saw_newline;
        else if (!IsWhitespace(*m_src))
            return saw_newline;
    }
    return saw_newline;
}

bool
GoLexer::SkipComment()
{
    if (m_src[0] == '/' && m_src[1] == '/')
    {
        for (const char *c = m_src + 2; c < m_end; ++c)
        {
            if (*c == '\n')
            {
                m_src = c - 1;
                return true;
            }
        }
        return true;
    }
    else if (m_src[0] == '/' && m_src[1] == '*')
    {
        for (const char *c = m_src + 2; c < m_end; ++c)
        {
            if (c[0] == '*' && c[1] == '/')
            {
                m_src = c + 1;
                return true;
            }
        }
    }
    return false;
}

const GoLexer::Token &
GoLexer::Lex()
{
    bool newline = SkipWhitespace();
    const char *start = m_src;
    m_last_token.m_type = InternalLex(newline);
    m_last_token.m_value = llvm::StringRef(start, m_src - start);
    return m_last_token;
}

GoLexer::TokenType
GoLexer::InternalLex(bool newline)
{
    if (m_src >= m_end)
    {
        return TOK_EOF;
    }
    if (newline)
    {
        switch (m_last_token.m_type)
        {
            case TOK_IDENTIFIER:
            case LIT_FLOAT:
            case LIT_IMAGINARY:
            case LIT_INTEGER:
            case LIT_RUNE:
            case LIT_STRING:
            case KEYWORD_BREAK:
            case KEYWORD_CONTINUE:
            case KEYWORD_FALLTHROUGH:
            case KEYWORD_RETURN:
            case OP_PLUS_PLUS:
            case OP_MINUS_MINUS:
            case OP_RPAREN:
            case OP_RBRACK:
            case OP_RBRACE:
                return OP_SEMICOLON;
            default:
                break;
        }
    }
    char c = *m_src;
    switch (c)
    {
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            return DoNumber();
        case '+':
        case '-':
        case '*':
        case '/':
        case '%':
        case '&':
        case '|':
        case '^':
        case '<':
        case '>':
        case '!':
        case ':':
        case ';':
        case '(':
        case ')':
        case '[':
        case ']':
        case '{':
        case '}':
        case ',':
        case '=':
            return DoOperator();
        case '.':
            if (IsDecimal(m_src[1]))
                return DoNumber();
            return DoOperator();
        case '$':
            // For lldb persistent vars.
            return DoIdent();
        case '"':
        case '`':
            return DoString();
        case '\'':
            return DoRune();
        default:
            break;
    }
    if (IsLetterOrDigit(c))
        return DoIdent();
    ++m_src;
    return TOK_INVALID;
}

GoLexer::TokenType
GoLexer::DoOperator()
{
    TokenType t = TOK_INVALID;
    if (m_end - m_src > 2)
    {
        t = LookupKeyword(llvm::StringRef(m_src, 3));
        if (t != TOK_INVALID)
            m_src += 3;
    }
    if (t == TOK_INVALID && m_end - m_src > 1)
    {
        t = LookupKeyword(llvm::StringRef(m_src, 2));
        if (t != TOK_INVALID)
            m_src += 2;
    }
    if (t == TOK_INVALID)
    {
        t = LookupKeyword(llvm::StringRef(m_src, 1));
        ++m_src;
    }
    return t;
}

GoLexer::TokenType
GoLexer::DoIdent()
{
    const char *start = m_src++;
    while (m_src < m_end && IsLetterOrDigit(*m_src))
    {
        ++m_src;
    }
    TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
    if (kw != TOK_INVALID)
        return kw;
    return TOK_IDENTIFIER;
}

GoLexer::TokenType
GoLexer::DoNumber()
{
    if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X'))
    {
        m_src += 2;
        while (IsHexChar(*m_src))
            ++m_src;
        return LIT_INTEGER;
    }
    bool dot_ok = true;
    bool e_ok = true;
    while (true)
    {
        while (IsDecimal(*m_src))
            ++m_src;
        switch (*m_src)
        {
            case 'i':
                ++m_src;
                return LIT_IMAGINARY;
            case '.':
                if (!dot_ok)
                    return LIT_FLOAT;
                ++m_src;
                dot_ok = false;
                break;
            case 'e':
            case 'E':
                if (!e_ok)
                    return LIT_FLOAT;
                dot_ok = e_ok = false;
                ++m_src;
                if (*m_src == '+' || *m_src == '-')
                    ++m_src;
                break;
            default:
                if (dot_ok)
                    return LIT_INTEGER;
                return LIT_FLOAT;
        }
    }
}

GoLexer::TokenType
GoLexer::DoRune()
{
    while (++m_src < m_end)
    {
        switch (*m_src)
        {
            case '\'':
                ++m_src;
                return LIT_RUNE;
            case '\n':
                return TOK_INVALID;
            case '\\':
                if (m_src[1] == '\n')
                    return TOK_INVALID;
                ++m_src;
        }
    }
    return TOK_INVALID;
}

GoLexer::TokenType
GoLexer::DoString()
{
    if (*m_src == '`')
    {
        while (++m_src < m_end)
        {
            if (*m_src == '`')
            {
                ++m_src;
                return LIT_STRING;
            }
        }
        return TOK_INVALID;
    }
    while (++m_src < m_end)
    {
        switch (*m_src)
        {
            case '"':
                ++m_src;
                return LIT_STRING;
            case '\n':
                return TOK_INVALID;
            case '\\':
                if (m_src[1] == '\n')
                    return TOK_INVALID;
                ++m_src;
        }
    }
    return TOK_INVALID;
}

GoLexer::TokenType
GoLexer::LookupKeyword(llvm::StringRef id)
{
    if (m_keywords == nullptr)
        m_keywords = InitKeywords();
    const auto &it = m_keywords->find(id);
    if (it == m_keywords->end())
        return TOK_INVALID;
    return it->second;
}

llvm::StringRef
GoLexer::LookupToken(TokenType t)
{
    if (m_keywords == nullptr)
        m_keywords = InitKeywords();
    for (const auto &e : *m_keywords)
    {
        if (e.getValue() == t)
            return e.getKey();
    }
    return "";
}

llvm::StringMap<GoLexer::TokenType> *
GoLexer::InitKeywords()
{
    auto &result = *new llvm::StringMap<TokenType>(128);
    result["break"] = KEYWORD_BREAK;
    result["default"] = KEYWORD_DEFAULT;
    result["func"] = KEYWORD_FUNC;
    result["interface"] = KEYWORD_INTERFACE;
    result["select"] = KEYWORD_SELECT;
    result["case"] = KEYWORD_CASE;
    result["defer"] = KEYWORD_DEFER;
    result["go"] = KEYWORD_GO;
    result["map"] = KEYWORD_MAP;
    result["struct"] = KEYWORD_STRUCT;
    result["chan"] = KEYWORD_CHAN;
    result["else"] = KEYWORD_ELSE;
    result["goto"] = KEYWORD_GOTO;
    result["package"] = KEYWORD_PACKAGE;
    result["switch"] = KEYWORD_SWITCH;
    result["const"] = KEYWORD_CONST;
    result["fallthrough"] = KEYWORD_FALLTHROUGH;
    result["if"] = KEYWORD_IF;
    result["range"] = KEYWORD_RANGE;
    result["type"] = KEYWORD_TYPE;
    result["continue"] = KEYWORD_CONTINUE;
    result["for"] = KEYWORD_FOR;
    result["import"] = KEYWORD_IMPORT;
    result["return"] = KEYWORD_RETURN;
    result["var"] = KEYWORD_VAR;
    result["+"] = OP_PLUS;
    result["-"] = OP_MINUS;
    result["*"] = OP_STAR;
    result["/"] = OP_SLASH;
    result["%"] = OP_PERCENT;
    result["&"] = OP_AMP;
    result["|"] = OP_PIPE;
    result["^"] = OP_CARET;
    result["<<"] = OP_LSHIFT;
    result[">>"] = OP_RSHIFT;
    result["&^"] = OP_AMP_CARET;
    result["+="] = OP_PLUS_EQ;
    result["-="] = OP_MINUS_EQ;
    result["*="] = OP_STAR_EQ;
    result["/="] = OP_SLASH_EQ;
    result["%="] = OP_PERCENT_EQ;
    result["&="] = OP_AMP_EQ;
    result["|="] = OP_PIPE_EQ;
    result["^="] = OP_CARET_EQ;
    result["<<="] = OP_LSHIFT_EQ;
    result[">>="] = OP_RSHIFT_EQ;
    result["&^="] = OP_AMP_CARET_EQ;
    result["&&"] = OP_AMP_AMP;
    result["||"] = OP_PIPE_PIPE;
    result["<-"] = OP_LT_MINUS;
    result["++"] = OP_PLUS_PLUS;
    result["--"] = OP_MINUS_MINUS;
    result["=="] = OP_EQ_EQ;
    result["<"] = OP_LT;
    result[">"] = OP_GT;
    result["="] = OP_EQ;
    result["!"] = OP_BANG;
    result["!="] = OP_BANG_EQ;
    result["<="] = OP_LT_EQ;
    result[">="] = OP_GT_EQ;
    result[":="] = OP_COLON_EQ;
    result["..."] = OP_DOTS;
    result["("] = OP_LPAREN;
    result["["] = OP_LBRACK;
    result["{"] = OP_LBRACE;
    result[","] = OP_COMMA;
    result["."] = OP_DOT;
    result[")"] = OP_RPAREN;
    result["]"] = OP_RBRACK;
    result["}"] = OP_RBRACE;
    result[";"] = OP_SEMICOLON;
    result[":"] = OP_COLON;
    return &result;
}
