//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include #include "GoLexer.h" using namespace lldb_private; llvm::StringMap *GoLexer::m_keywords; GoLexer::GoLexer(const char *src) : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {} bool GoLexer::SkipWhitespace() { bool saw_newline = false; for (; m_src < m_end; ++m_src) { if (*m_src == '\n') saw_newline = true; if (*m_src == '/' && !SkipComment()) return saw_newline; else if (!IsWhitespace(*m_src)) return saw_newline; } return saw_newline; } bool GoLexer::SkipComment() { if (m_src[0] == '/' && m_src[1] == '/') { for (const char *c = m_src + 2; c < m_end; ++c) { if (*c == '\n') { m_src = c - 1; return true; } } return true; } else if (m_src[0] == '/' && m_src[1] == '*') { for (const char *c = m_src + 2; c < m_end; ++c) { if (c[0] == '*' && c[1] == '/') { m_src = c + 1; return true; } } } return false; } const GoLexer::Token &GoLexer::Lex() { bool newline = SkipWhitespace(); const char *start = m_src; m_last_token.m_type = InternalLex(newline); m_last_token.m_value = llvm::StringRef(start, m_src - start); return m_last_token; } GoLexer::TokenType GoLexer::InternalLex(bool newline) { if (m_src >= m_end) { return TOK_EOF; } if (newline) { switch (m_last_token.m_type) { case TOK_IDENTIFIER: case LIT_FLOAT: case LIT_IMAGINARY: case LIT_INTEGER: case LIT_RUNE: case LIT_STRING: case KEYWORD_BREAK: case KEYWORD_CONTINUE: case KEYWORD_FALLTHROUGH: case KEYWORD_RETURN: case OP_PLUS_PLUS: case OP_MINUS_MINUS: case OP_RPAREN: case OP_RBRACK: case OP_RBRACE: return OP_SEMICOLON; default: break; } } char c = *m_src; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return DoNumber(); case '+': case '-': case '*': case '/': case '%': case '&': case '|': case '^': case '<': case '>': case '!': case ':': case ';': case '(': case ')': case '[': case ']': case '{': case '}': case ',': case '=': return DoOperator(); case '.': if (IsDecimal(m_src[1])) return DoNumber(); return DoOperator(); case '$': // For lldb persistent vars. return DoIdent(); case '"': case '`': return DoString(); case '\'': return DoRune(); default: break; } if (IsLetterOrDigit(c)) return DoIdent(); ++m_src; return TOK_INVALID; } GoLexer::TokenType GoLexer::DoOperator() { TokenType t = TOK_INVALID; if (m_end - m_src > 2) { t = LookupKeyword(llvm::StringRef(m_src, 3)); if (t != TOK_INVALID) m_src += 3; } if (t == TOK_INVALID && m_end - m_src > 1) { t = LookupKeyword(llvm::StringRef(m_src, 2)); if (t != TOK_INVALID) m_src += 2; } if (t == TOK_INVALID) { t = LookupKeyword(llvm::StringRef(m_src, 1)); ++m_src; } return t; } GoLexer::TokenType GoLexer::DoIdent() { const char *start = m_src++; while (m_src < m_end && IsLetterOrDigit(*m_src)) { ++m_src; } TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start)); if (kw != TOK_INVALID) return kw; return TOK_IDENTIFIER; } GoLexer::TokenType GoLexer::DoNumber() { if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) { m_src += 2; while (IsHexChar(*m_src)) ++m_src; return LIT_INTEGER; } bool dot_ok = true; bool e_ok = true; while (true) { while (IsDecimal(*m_src)) ++m_src; switch (*m_src) { case 'i': ++m_src; return LIT_IMAGINARY; case '.': if (!dot_ok) return LIT_FLOAT; ++m_src; dot_ok = false; break; case 'e': case 'E': if (!e_ok) return LIT_FLOAT; dot_ok = e_ok = false; ++m_src; if (*m_src == '+' || *m_src == '-') ++m_src; break; default: if (dot_ok) return LIT_INTEGER; return LIT_FLOAT; } } } GoLexer::TokenType GoLexer::DoRune() { while (++m_src < m_end) { switch (*m_src) { case '\'': ++m_src; return LIT_RUNE; case '\n': return TOK_INVALID; case '\\': if (m_src[1] == '\n') return TOK_INVALID; ++m_src; } } return TOK_INVALID; } GoLexer::TokenType GoLexer::DoString() { if (*m_src == '`') { while (++m_src < m_end) { if (*m_src == '`') { ++m_src; return LIT_STRING; } } return TOK_INVALID; } while (++m_src < m_end) { switch (*m_src) { case '"': ++m_src; return LIT_STRING; case '\n': return TOK_INVALID; case '\\': if (m_src[1] == '\n') return TOK_INVALID; ++m_src; } } return TOK_INVALID; } GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) { if (m_keywords == nullptr) m_keywords = InitKeywords(); const auto &it = m_keywords->find(id); if (it == m_keywords->end()) return TOK_INVALID; return it->second; } llvm::StringRef GoLexer::LookupToken(TokenType t) { if (m_keywords == nullptr) m_keywords = InitKeywords(); for (const auto &e : *m_keywords) { if (e.getValue() == t) return e.getKey(); } return ""; } llvm::StringMap *GoLexer::InitKeywords() { auto &result = *new llvm::StringMap(128); result["break"] = KEYWORD_BREAK; result["default"] = KEYWORD_DEFAULT; result["func"] = KEYWORD_FUNC; result["interface"] = KEYWORD_INTERFACE; result["select"] = KEYWORD_SELECT; result["case"] = KEYWORD_CASE; result["defer"] = KEYWORD_DEFER; result["go"] = KEYWORD_GO; result["map"] = KEYWORD_MAP; result["struct"] = KEYWORD_STRUCT; result["chan"] = KEYWORD_CHAN; result["else"] = KEYWORD_ELSE; result["goto"] = KEYWORD_GOTO; result["package"] = KEYWORD_PACKAGE; result["switch"] = KEYWORD_SWITCH; result["const"] = KEYWORD_CONST; result["fallthrough"] = KEYWORD_FALLTHROUGH; result["if"] = KEYWORD_IF; result["range"] = KEYWORD_RANGE; result["type"] = KEYWORD_TYPE; result["continue"] = KEYWORD_CONTINUE; result["for"] = KEYWORD_FOR; result["import"] = KEYWORD_IMPORT; result["return"] = KEYWORD_RETURN; result["var"] = KEYWORD_VAR; result["+"] = OP_PLUS; result["-"] = OP_MINUS; result["*"] = OP_STAR; result["/"] = OP_SLASH; result["%"] = OP_PERCENT; result["&"] = OP_AMP; result["|"] = OP_PIPE; result["^"] = OP_CARET; result["<<"] = OP_LSHIFT; result[">>"] = OP_RSHIFT; result["&^"] = OP_AMP_CARET; result["+="] = OP_PLUS_EQ; result["-="] = OP_MINUS_EQ; result["*="] = OP_STAR_EQ; result["/="] = OP_SLASH_EQ; result["%="] = OP_PERCENT_EQ; result["&="] = OP_AMP_EQ; result["|="] = OP_PIPE_EQ; result["^="] = OP_CARET_EQ; result["<<="] = OP_LSHIFT_EQ; result[">>="] = OP_RSHIFT_EQ; result["&^="] = OP_AMP_CARET_EQ; result["&&"] = OP_AMP_AMP; result["||"] = OP_PIPE_PIPE; result["<-"] = OP_LT_MINUS; result["++"] = OP_PLUS_PLUS; result["--"] = OP_MINUS_MINUS; result["=="] = OP_EQ_EQ; result["<"] = OP_LT; result[">"] = OP_GT; result["="] = OP_EQ; result["!"] = OP_BANG; result["!="] = OP_BANG_EQ; result["<="] = OP_LT_EQ; result[">="] = OP_GT_EQ; result[":="] = OP_COLON_EQ; result["..."] = OP_DOTS; result["("] = OP_LPAREN; result["["] = OP_LBRACK; result["{"] = OP_LBRACE; result[","] = OP_COMMA; result["."] = OP_DOT; result[")"] = OP_RPAREN; result["]"] = OP_RBRACK; result["}"] = OP_RBRACE; result[";"] = OP_SEMICOLON; result[":"] = OP_COLON; return &result; }