Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions

View File

@@ -0,0 +1,150 @@
//===--- AffectedRangeManager.cpp - Format C++ code -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements AffectRangeManager class.
///
//===----------------------------------------------------------------------===//
#include "AffectedRangeManager.h"
#include "FormatToken.h"
#include "TokenAnnotator.h"
namespace clang {
namespace format {
bool AffectedRangeManager::computeAffectedLines(
SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E) {
bool SomeLineAffected = false;
const AnnotatedLine *PreviousLine = nullptr;
while (I != E) {
AnnotatedLine *Line = *I;
Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
// If a line is part of a preprocessor directive, it needs to be formatted
// if any token within the directive is affected.
if (Line->InPPDirective) {
FormatToken *Last = Line->Last;
SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
Last = (*PPEnd)->Last;
++PPEnd;
}
if (affectsTokenRange(*Line->First, *Last,
/*IncludeLeadingNewlines=*/false)) {
SomeLineAffected = true;
markAllAsAffected(I, PPEnd);
}
I = PPEnd;
continue;
}
if (nonPPLineAffected(Line, PreviousLine))
SomeLineAffected = true;
PreviousLine = Line;
++I;
}
return SomeLineAffected;
}
bool AffectedRangeManager::affectsCharSourceRange(
const CharSourceRange &Range) {
for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
E = Ranges.end();
I != E; ++I) {
if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
!SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
return true;
}
return false;
}
bool AffectedRangeManager::affectsTokenRange(const FormatToken &First,
const FormatToken &Last,
bool IncludeLeadingNewlines) {
SourceLocation Start = First.WhitespaceRange.getBegin();
if (!IncludeLeadingNewlines)
Start = Start.getLocWithOffset(First.LastNewlineOffset);
SourceLocation End = Last.getStartOfNonWhitespace();
End = End.getLocWithOffset(Last.TokenText.size());
CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
return affectsCharSourceRange(Range);
}
bool AffectedRangeManager::affectsLeadingEmptyLines(const FormatToken &Tok) {
CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
Tok.WhitespaceRange.getBegin(),
Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
return affectsCharSourceRange(EmptyLineRange);
}
void AffectedRangeManager::markAllAsAffected(
SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E) {
while (I != E) {
(*I)->Affected = true;
markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
++I;
}
}
bool AffectedRangeManager::nonPPLineAffected(
AnnotatedLine *Line, const AnnotatedLine *PreviousLine) {
bool SomeLineAffected = false;
Line->ChildrenAffected =
computeAffectedLines(Line->Children.begin(), Line->Children.end());
if (Line->ChildrenAffected)
SomeLineAffected = true;
// Stores whether one of the line's tokens is directly affected.
bool SomeTokenAffected = false;
// Stores whether we need to look at the leading newlines of the next token
// in order to determine whether it was affected.
bool IncludeLeadingNewlines = false;
// Stores whether the first child line of any of this line's tokens is
// affected.
bool SomeFirstChildAffected = false;
for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
// Determine whether 'Tok' was affected.
if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
SomeTokenAffected = true;
// Determine whether the first child of 'Tok' was affected.
if (!Tok->Children.empty() && Tok->Children.front()->Affected)
SomeFirstChildAffected = true;
IncludeLeadingNewlines = Tok->Children.empty();
}
// Was this line moved, i.e. has it previously been on the same line as an
// affected line?
bool LineMoved = PreviousLine && PreviousLine->Affected &&
Line->First->NewlinesBefore == 0;
bool IsContinuedComment =
Line->First->is(tok::comment) && Line->First->Next == nullptr &&
Line->First->NewlinesBefore < 2 && PreviousLine &&
PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
IsContinuedComment) {
Line->Affected = true;
SomeLineAffected = true;
}
return SomeLineAffected;
}
} // namespace format
} // namespace clang

View File

@@ -0,0 +1,67 @@
//===--- AffectedRangeManager.h - Format C++ code ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief AffectedRangeManager class manages affected ranges in the code.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
#define LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
#include "clang/Basic/SourceManager.h"
namespace clang {
namespace format {
struct FormatToken;
class AnnotatedLine;
class AffectedRangeManager {
public:
AffectedRangeManager(const SourceManager &SourceMgr,
const ArrayRef<CharSourceRange> Ranges)
: SourceMgr(SourceMgr), Ranges(Ranges.begin(), Ranges.end()) {}
// Determines which lines are affected by the SourceRanges given as input.
// Returns \c true if at least one line between I and E or one of their
// children is affected.
bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E);
// Returns true if 'Range' intersects with one of the input ranges.
bool affectsCharSourceRange(const CharSourceRange &Range);
private:
// Returns true if the range from 'First' to 'Last' intersects with one of the
// input ranges.
bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
bool IncludeLeadingNewlines);
// Returns true if one of the input ranges intersect the leading empty lines
// before 'Tok'.
bool affectsLeadingEmptyLines(const FormatToken &Tok);
// Marks all lines between I and E as well as all their children as affected.
void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E);
// Determines whether 'Line' is affected by the SourceRanges given as input.
// Returns \c true if line or one if its children is affected.
bool nonPPLineAffected(AnnotatedLine *Line,
const AnnotatedLine *PreviousLine);
const SourceManager &SourceMgr;
const SmallVector<CharSourceRange, 8> Ranges;
};
} // namespace format
} // namespace clang
#endif // LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,468 @@
//===--- BreakableToken.h - Format C++ code -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
/// token type-specific logic to break long lines in tokens and reflow content
/// between tokens.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
#include "Encoding.h"
#include "TokenAnnotator.h"
#include "WhitespaceManager.h"
#include "llvm/Support/Regex.h"
#include <utility>
namespace clang {
namespace format {
/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
/// \p Token must be a comment.
bool switchesFormatting(const FormatToken &Token);
struct FormatStyle;
/// \brief Base class for tokens / ranges of tokens that can allow breaking
/// within the tokens - for example, to avoid whitespace beyond the column
/// limit, or to reflow text.
///
/// Generally, a breakable token consists of logical lines, addressed by a line
/// index. For example, in a sequence of line comments, each line comment is its
/// own logical line; similarly, for a block comment, each line in the block
/// comment is on its own logical line.
///
/// There are two methods to compute the layout of the token:
/// - getRangeLength measures the number of columns needed for a range of text
/// within a logical line, and
/// - getContentStartColumn returns the start column at which we want the
/// content of a logical line to start (potentially after introducing a line
/// break).
///
/// The mechanism to adapt the layout of the breakable token is organised
/// around the concept of a \c Split, which is a whitespace range that signifies
/// a position of the content of a token where a reformatting might be done.
///
/// Operating with splits is divided into two operations:
/// - getSplit, for finding a split starting at a position,
/// - insertBreak, for executing the split using a whitespace manager.
///
/// There is a pair of operations that are used to compress a long whitespace
/// range with a single space if that will bring the line length under the
/// column limit:
/// - getLineLengthAfterCompression, for calculating the size in columns of the
/// line after a whitespace range has been compressed, and
/// - compressWhitespace, for executing the whitespace compression using a
/// whitespace manager; note that the compressed whitespace may be in the
/// middle of the original line and of the reformatted line.
///
/// For tokens where the whitespace before each line needs to be also
/// reformatted, for example for tokens supporting reflow, there are analogous
/// operations that might be executed before the main line breaking occurs:
/// - getReflowSplit, for finding a split such that the content preceding it
/// needs to be specially reflown,
/// - reflow, for executing the split using a whitespace manager,
/// - introducesBreakBefore, for checking if reformatting the beginning
/// of the content introduces a line break before it,
/// - adaptStartOfLine, for executing the reflow using a whitespace
/// manager.
///
/// For tokens that require the whitespace after the last line to be
/// reformatted, for example in multiline jsdoc comments that require the
/// trailing '*/' to be on a line of itself, there are analogous operations
/// that might be executed after the last line has been reformatted:
/// - getSplitAfterLastLine, for finding a split after the last line that needs
/// to be reflown,
/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
/// whitespace manager.
///
class BreakableToken {
public:
/// \brief Contains starting character index and length of split.
typedef std::pair<StringRef::size_type, unsigned> Split;
virtual ~BreakableToken() {}
/// \brief Returns the number of lines in this token in the original code.
virtual unsigned getLineCount() const = 0;
/// \brief Returns the number of columns required to format the text in the
/// byte range [\p Offset, \p Offset \c + \p Length).
///
/// \p Offset is the byte offset from the start of the content of the line
/// at \p LineIndex.
///
/// \p StartColumn is the column at which the text starts in the formatted
/// file, needed to compute tab stops correctly.
virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
StringRef::size_type Length,
unsigned StartColumn) const = 0;
/// \brief Returns the number of columns required to format the text following
/// the byte \p Offset in the line \p LineIndex, including potentially
/// unbreakable sequences of tokens following after the end of the token.
///
/// \p Offset is the byte offset from the start of the content of the line
/// at \p LineIndex.
///
/// \p StartColumn is the column at which the text starts in the formatted
/// file, needed to compute tab stops correctly.
///
/// For breakable tokens that never use extra space at the end of a line, this
/// is equivalent to getRangeLength with a Length of StringRef::npos.
virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
unsigned StartColumn) const {
return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
}
/// \brief Returns the column at which content in line \p LineIndex starts,
/// assuming no reflow.
///
/// If \p Break is true, returns the column at which the line should start
/// after the line break.
/// If \p Break is false, returns the column at which the line itself will
/// start.
virtual unsigned getContentStartColumn(unsigned LineIndex,
bool Break) const = 0;
/// \brief Returns a range (offset, length) at which to break the line at
/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
/// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
/// the token is formatted starting at ContentStartColumn in the reformatted
/// file.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit, unsigned ContentStartColumn,
llvm::Regex &CommentPragmasRegex) const = 0;
/// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const = 0;
/// \brief Returns the number of columns needed to format
/// \p RemainingTokenColumns, assuming that Split is within the range measured
/// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
/// to a single space.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
Split Split) const;
/// \brief Replaces the whitespace range described by \p Split with a single
/// space.
virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
Split Split,
WhitespaceManager &Whitespaces) const = 0;
/// \brief Returns whether the token supports reflowing text.
virtual bool supportsReflow() const { return false; }
/// \brief Returns a whitespace range (offset, length) of the content at \p
/// LineIndex such that the content of that line is reflown to the end of the
/// previous one.
///
/// Returning (StringRef::npos, 0) indicates reflowing is not possible.
///
/// The range will include any whitespace preceding the specified line's
/// content.
///
/// If the split is not contained within one token, for example when reflowing
/// line comments, returns (0, <length>).
virtual Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const {
return Split(StringRef::npos, 0);
}
/// \brief Reflows the current line into the end of the previous one.
virtual void reflow(unsigned LineIndex,
WhitespaceManager &Whitespaces) const {}
/// \brief Returns whether there will be a line break at the start of the
/// token.
virtual bool introducesBreakBeforeToken() const {
return false;
}
/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
virtual void adaptStartOfLine(unsigned LineIndex,
WhitespaceManager &Whitespaces) const {}
/// \brief Returns a whitespace range (offset, length) of the content at
/// the last line that needs to be reformatted after the last line has been
/// reformatted.
///
/// A result having offset == StringRef::npos means that no reformat is
/// necessary.
virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
return Split(StringRef::npos, 0);
}
/// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
/// after the last line has been formatted by performing a reformatting.
void replaceWhitespaceAfterLastLine(unsigned TailOffset,
Split SplitAfterLastLine,
WhitespaceManager &Whitespaces) const {
insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
Whitespaces);
}
/// \brief Updates the next token of \p State to the next token after this
/// one. This can be used when this token manages a set of underlying tokens
/// as a unit and is responsible for the formatting of the them.
virtual void updateNextToken(LineState &State) const {}
protected:
BreakableToken(const FormatToken &Tok, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style)
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
Style(Style) {}
const FormatToken &Tok;
const bool InPPDirective;
const encoding::Encoding Encoding;
const FormatStyle &Style;
};
class BreakableStringLiteral : public BreakableToken {
public:
/// \brief Creates a breakable token for a single line string literal.
///
/// \p StartColumn specifies the column in which the token will start
/// after formatting.
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
StringRef Prefix, StringRef Postfix,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const override {}
unsigned getLineCount() const override;
unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
StringRef::size_type Length,
unsigned StartColumn) const override;
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
unsigned StartColumn) const override;
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
protected:
// The column in which the token starts.
unsigned StartColumn;
// The prefix a line needs after a break in the token.
StringRef Prefix;
// The postfix a line needs before introducing a break.
StringRef Postfix;
// The token text excluding the prefix and postfix.
StringRef Line;
// Length of the sequence of tokens after this string literal that cannot
// contain line breaks.
unsigned UnbreakableTailLength;
};
class BreakableComment : public BreakableToken {
protected:
/// \brief Creates a breakable token for a comment.
///
/// \p StartColumn specifies the column in which the comment will start after
/// formatting.
BreakableComment(const FormatToken &Token, unsigned StartColumn,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
public:
bool supportsReflow() const override { return true; }
unsigned getLineCount() const override;
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const override;
protected:
// Returns the token containing the line at LineIndex.
const FormatToken &tokenAt(unsigned LineIndex) const;
// Checks if the content of line LineIndex may be reflown with the previous
// line.
virtual bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const = 0;
// Contains the original text of the lines of the block comment.
//
// In case of a block comments, excludes the leading /* in the first line and
// trailing */ in the last line. In case of line comments, excludes the
// leading // and spaces.
SmallVector<StringRef, 16> Lines;
// Contains the text of the lines excluding all leading and trailing
// whitespace between the lines. Note that the decoration (if present) is also
// not considered part of the text.
SmallVector<StringRef, 16> Content;
// Tokens[i] contains a reference to the token containing Lines[i] if the
// whitespace range before that token is managed by this block.
// Otherwise, Tokens[i] is a null pointer.
SmallVector<FormatToken *, 16> Tokens;
// ContentColumn[i] is the target column at which Content[i] should be.
// Note that this excludes a leading "* " or "*" in case of block comments
// where all lines have a "*" prefix, or the leading "// " or "//" in case of
// line comments.
//
// In block comments, the first line's target column is always positive. The
// remaining lines' target columns are relative to the first line to allow
// correct indentation of comments in \c WhitespaceManager. Thus they can be
// negative as well (in case the first line needs to be unindented more than
// there's actual whitespace in another line).
SmallVector<int, 16> ContentColumn;
// The intended start column of the first line of text from this section.
unsigned StartColumn;
// The prefix to use in front a line that has been reflown up.
// For example, when reflowing the second line after the first here:
// // comment 1
// // comment 2
// we expect:
// // comment 1 comment 2
// and not:
// // comment 1comment 2
StringRef ReflowPrefix = " ";
};
class BreakableBlockComment : public BreakableComment {
public:
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
StringRef::size_type Length,
unsigned StartColumn) const override;
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
unsigned StartColumn) const override;
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const override;
Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
void reflow(unsigned LineIndex,
WhitespaceManager &Whitespaces) const override;
bool introducesBreakBeforeToken() const override;
void adaptStartOfLine(unsigned LineIndex,
WhitespaceManager &Whitespaces) const override;
Split getSplitAfterLastLine(unsigned TailOffset) const override;
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
private:
// Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
//
// Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
// leading and trailing whitespace.
//
// Sets ContentColumn to the intended column in which the text at
// Lines[LineIndex] starts (note that the decoration, if present, is not
// considered part of the text).
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
// The column at which the text of a broken line should start.
// Note that an optional decoration would go before that column.
// IndentAtLineBreak is a uniform position for all lines in a block comment,
// regardless of their relative position.
// FIXME: Revisit the decision to do this; the main reason was to support
// patterns like
// /**************//**
// * Comment
// We could also support such patterns by special casing the first line
// instead.
unsigned IndentAtLineBreak;
// This is to distinguish between the case when the last line was empty and
// the case when it started with a decoration ("*" or "* ").
bool LastLineNeedsDecoration;
// Either "* " if all lines begin with a "*", or empty.
StringRef Decoration;
// If this block comment has decorations, this is the column of the start of
// the decorations.
unsigned DecorationColumn;
// If true, make sure that the opening '/**' and the closing '*/' ends on a
// line of itself. Styles like jsdoc require this for multiline comments.
bool DelimitersOnNewline;
// Length of the sequence of tokens after this string literal that cannot
// contain line breaks.
unsigned UnbreakableTailLength;
};
class BreakableLineCommentSection : public BreakableComment {
public:
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
StringRef::size_type Length,
unsigned StartColumn) const override;
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) const override;
Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
void reflow(unsigned LineIndex,
WhitespaceManager &Whitespaces) const override;
void adaptStartOfLine(unsigned LineIndex,
WhitespaceManager &Whitespaces) const override;
void updateNextToken(LineState &State) const override;
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
private:
// OriginalPrefix[i] contains the original prefix of line i, including
// trailing whitespace before the start of the content. The indentation
// preceding the prefix is not included.
// For example, if the line is:
// // content
// then the original prefix is "// ".
SmallVector<StringRef, 16> OriginalPrefix;
// Prefix[i] contains the intended leading "//" with trailing spaces to
// account for the indentation of content within the comment at line i after
// formatting. It can be different than the original prefix when the original
// line starts like this:
// //content
// Then the original prefix is "//", but the prefix is "// ".
SmallVector<StringRef, 16> Prefix;
SmallVector<unsigned, 16> OriginalContentColumn;
/// \brief The token to which the last line of this breakable token belongs
/// to; nullptr if that token is the initial token.
///
/// The distinction is because if the token of the last line of this breakable
/// token is distinct from the initial token, this breakable token owns the
/// whitespace before the token of the last line, and the whitespace manager
/// must be able to modify it.
FormatToken *LastLineTok = nullptr;
};
} // namespace format
} // namespace clang
#endif

View File

@@ -0,0 +1,23 @@
set(LLVM_LINK_COMPONENTS support)
add_clang_library(clangFormat
AffectedRangeManager.cpp
BreakableToken.cpp
ContinuationIndenter.cpp
Format.cpp
FormatToken.cpp
FormatTokenLexer.cpp
NamespaceEndCommentsFixer.cpp
SortJavaScriptImports.cpp
TokenAnalyzer.cpp
TokenAnnotator.cpp
UnwrappedLineFormatter.cpp
UnwrappedLineParser.cpp
UsingDeclarationsSorter.cpp
WhitespaceManager.cpp
LINK_LIBS
clangBasic
clangLex
clangToolingCore
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,437 @@
//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements an indenter that manages the indentation of
/// continuations.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
#include "Encoding.h"
#include "FormatToken.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
#include <map>
#include <tuple>
namespace clang {
class SourceManager;
namespace format {
class AnnotatedLine;
class BreakableToken;
struct FormatToken;
struct LineState;
struct ParenState;
struct RawStringFormatStyleManager;
class WhitespaceManager;
struct RawStringFormatStyleManager {
llvm::StringMap<FormatStyle> DelimiterStyle;
RawStringFormatStyleManager(const FormatStyle &CodeStyle);
llvm::Optional<FormatStyle> get(StringRef Delimiter) const;
};
class ContinuationIndenter {
public:
/// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
/// column \p FirstIndent.
ContinuationIndenter(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
const SourceManager &SourceMgr,
WhitespaceManager &Whitespaces,
encoding::Encoding Encoding,
bool BinPackInconclusiveFunctions);
/// \brief Get the initial state, i.e. the state after placing \p Line's
/// first token at \p FirstIndent. When reformatting a fragment of code, as in
/// the case of formatting inside raw string literals, \p FirstStartColumn is
/// the column at which the state of the parent formatter is.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
const AnnotatedLine *Line, bool DryRun);
// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
// better home.
/// \brief Returns \c true, if a line break after \p State is allowed.
bool canBreak(const LineState &State);
/// \brief Returns \c true, if a line break after \p State is mandatory.
bool mustBreak(const LineState &State);
/// \brief Appends the next token to \p State and updates information
/// necessary for indentation.
///
/// Puts the token on the current line if \p Newline is \c false and adds a
/// line break and necessary indentation otherwise.
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
unsigned ExtraSpaces = 0);
/// \brief Get the column limit for this line. This is the style's column
/// limit, potentially reduced for preprocessor definitions.
unsigned getColumnLimit(const LineState &State) const;
private:
/// \brief Mark the next token as consumed in \p State and modify its stacks
/// accordingly.
unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
/// \brief Update 'State' according to the next token's fake left parentheses.
void moveStatePastFakeLParens(LineState &State, bool Newline);
/// \brief Update 'State' according to the next token's fake r_parens.
void moveStatePastFakeRParens(LineState &State);
/// \brief Update 'State' according to the next token being one of "(<{[".
void moveStatePastScopeOpener(LineState &State, bool Newline);
/// \brief Update 'State' according to the next token being one of ")>}]".
void moveStatePastScopeCloser(LineState &State);
/// \brief Update 'State' with the next token opening a nested block.
void moveStateToNewBlock(LineState &State);
/// \brief Reformats a raw string literal.
///
/// \returns An extra penalty induced by reformatting the token.
unsigned reformatRawStringLiteral(const FormatToken &Current,
LineState &State,
const FormatStyle &RawStringStyle,
bool DryRun);
/// \brief If the current token is at the end of the current line, handle
/// the transition to the next line.
unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
bool DryRun, bool AllowBreak);
/// \brief If \p Current is a raw string that is configured to be reformatted,
/// return the style to be used.
llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
const LineState &State);
/// \brief If the current token sticks out over the end of the line, break
/// it if possible.
///
/// \returns A pair (penalty, exceeded), where penalty is the extra penalty
/// when tokens are broken or lines exceed the column limit, and exceeded
/// indicates whether the algorithm purposefully left lines exceeding the
/// column limit.
///
/// The returned penalty will cover the cost of the additional line breaks
/// and column limit violation in all lines except for the last one. The
/// penalty for the column limit violation in the last line (and in single
/// line tokens) is handled in \c addNextStateToQueue.
///
/// \p Strict indicates whether reflowing is allowed to leave characters
/// protruding the column limit; if true, lines will be split strictly within
/// the column limit where possible; if false, words are allowed to protrude
/// over the column limit as long as the penalty is less than the penalty
/// of a break.
std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool AllowBreak, bool DryRun,
bool Strict);
/// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
/// if the current token cannot be broken.
std::unique_ptr<BreakableToken>
createBreakableToken(const FormatToken &Current, LineState &State,
bool AllowBreak);
/// \brief Appends the next token to \p State and updates information
/// necessary for indentation.
///
/// Puts the token on the current line.
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
void addTokenOnCurrentLine(LineState &State, bool DryRun,
unsigned ExtraSpaces);
/// \brief Appends the next token to \p State and updates information
/// necessary for indentation.
///
/// Adds a line break and necessary indentation.
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
unsigned addTokenOnNewLine(LineState &State, bool DryRun);
/// \brief Calculate the new column for a line wrap before the next token.
unsigned getNewLineColumn(const LineState &State);
/// \brief Adds a multiline token to the \p State.
///
/// \returns Extra penalty for the first line of the literal: last line is
/// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
/// matter, as we don't change them.
unsigned addMultilineToken(const FormatToken &Current, LineState &State);
/// \brief Returns \c true if the next token starts a multiline string
/// literal.
///
/// This includes implicitly concatenated strings, strings that will be broken
/// by clang-format and string literals with escaped newlines.
bool nextIsMultilineString(const LineState &State);
FormatStyle Style;
const AdditionalKeywords &Keywords;
const SourceManager &SourceMgr;
WhitespaceManager &Whitespaces;
encoding::Encoding Encoding;
bool BinPackInconclusiveFunctions;
llvm::Regex CommentPragmasRegex;
const RawStringFormatStyleManager RawStringFormats;
};
struct ParenState {
ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
bool NoLineBreak)
: Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
NoLineBreakInOperand(false), LastOperatorWrapped(true),
ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
AlignColons(true), ObjCSelectorNameFound(false),
HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
/// \brief The position to which a specific parenthesis level needs to be
/// indented.
unsigned Indent;
/// \brief The position of the last space on each level.
///
/// Used e.g. to break like:
/// functionCall(Parameter, otherCall(
/// OtherParameter));
unsigned LastSpace;
/// \brief If a block relative to this parenthesis level gets wrapped, indent
/// it this much.
unsigned NestedBlockIndent;
/// \brief The position the first "<<" operator encountered on each level.
///
/// Used to align "<<" operators. 0 if no such operator has been encountered
/// on a level.
unsigned FirstLessLess = 0;
/// \brief The column of a \c ? in a conditional expression;
unsigned QuestionColumn = 0;
/// \brief The position of the colon in an ObjC method declaration/call.
unsigned ColonPos = 0;
/// \brief The start of the most recent function in a builder-type call.
unsigned StartOfFunctionCall = 0;
/// \brief Contains the start of array subscript expressions, so that they
/// can be aligned.
unsigned StartOfArraySubscripts = 0;
/// \brief If a nested name specifier was broken over multiple lines, this
/// contains the start column of the second line. Otherwise 0.
unsigned NestedNameSpecifierContinuation = 0;
/// \brief If a call expression was broken over multiple lines, this
/// contains the start column of the second line. Otherwise 0.
unsigned CallContinuation = 0;
/// \brief The column of the first variable name in a variable declaration.
///
/// Used to align further variables if necessary.
unsigned VariablePos = 0;
/// \brief Whether a newline needs to be inserted before the block's closing
/// brace.
///
/// We only want to insert a newline before the closing brace if there also
/// was a newline after the beginning left brace.
bool BreakBeforeClosingBrace : 1;
/// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
/// lines, in this context.
bool AvoidBinPacking : 1;
/// \brief Break after the next comma (or all the commas in this context if
/// \c AvoidBinPacking is \c true).
bool BreakBeforeParameter : 1;
/// \brief Line breaking in this context would break a formatting rule.
bool NoLineBreak : 1;
/// \brief Same as \c NoLineBreak, but is restricted until the end of the
/// operand (including the next ",").
bool NoLineBreakInOperand : 1;
/// \brief True if the last binary operator on this level was wrapped to the
/// next line.
bool LastOperatorWrapped : 1;
/// \brief \c true if this \c ParenState already contains a line-break.
///
/// The first line break in a certain \c ParenState causes extra penalty so
/// that clang-format prefers similar breaks, i.e. breaks in the same
/// parenthesis.
bool ContainsLineBreak : 1;
/// \brief \c true if this \c ParenState contains multiple segments of a
/// builder-type call on one line.
bool ContainsUnwrappedBuilder : 1;
/// \brief \c true if the colons of the curren ObjC method expression should
/// be aligned.
///
/// Not considered for memoization as it will always have the same value at
/// the same token.
bool AlignColons : 1;
/// \brief \c true if at least one selector name was found in the current
/// ObjC method expression.
///
/// Not considered for memoization as it will always have the same value at
/// the same token.
bool ObjCSelectorNameFound : 1;
/// \brief \c true if there are multiple nested blocks inside these parens.
///
/// Not considered for memoization as it will always have the same value at
/// the same token.
bool HasMultipleNestedBlocks : 1;
// \brief The start of a nested block (e.g. lambda introducer in C++ or
// "function" in JavaScript) is not wrapped to a new line.
bool NestedBlockInlined : 1;
bool operator<(const ParenState &Other) const {
if (Indent != Other.Indent)
return Indent < Other.Indent;
if (LastSpace != Other.LastSpace)
return LastSpace < Other.LastSpace;
if (NestedBlockIndent != Other.NestedBlockIndent)
return NestedBlockIndent < Other.NestedBlockIndent;
if (FirstLessLess != Other.FirstLessLess)
return FirstLessLess < Other.FirstLessLess;
if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
return BreakBeforeClosingBrace;
if (QuestionColumn != Other.QuestionColumn)
return QuestionColumn < Other.QuestionColumn;
if (AvoidBinPacking != Other.AvoidBinPacking)
return AvoidBinPacking;
if (BreakBeforeParameter != Other.BreakBeforeParameter)
return BreakBeforeParameter;
if (NoLineBreak != Other.NoLineBreak)
return NoLineBreak;
if (LastOperatorWrapped != Other.LastOperatorWrapped)
return LastOperatorWrapped;
if (ColonPos != Other.ColonPos)
return ColonPos < Other.ColonPos;
if (StartOfFunctionCall != Other.StartOfFunctionCall)
return StartOfFunctionCall < Other.StartOfFunctionCall;
if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
return StartOfArraySubscripts < Other.StartOfArraySubscripts;
if (CallContinuation != Other.CallContinuation)
return CallContinuation < Other.CallContinuation;
if (VariablePos != Other.VariablePos)
return VariablePos < Other.VariablePos;
if (ContainsLineBreak != Other.ContainsLineBreak)
return ContainsLineBreak;
if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
return ContainsUnwrappedBuilder;
if (NestedBlockInlined != Other.NestedBlockInlined)
return NestedBlockInlined;
return false;
}
};
/// \brief The current state when indenting a unwrapped line.
///
/// As the indenting tries different combinations this is copied by value.
struct LineState {
/// \brief The number of used columns in the current line.
unsigned Column;
/// \brief The token that needs to be next formatted.
FormatToken *NextToken;
/// \brief \c true if this line contains a continued for-loop section.
bool LineContainsContinuedForLoopSection;
/// \brief \c true if \p NextToken should not continue this line.
bool NoContinuation;
/// \brief The \c NestingLevel at the start of this line.
unsigned StartOfLineLevel;
/// \brief The lowest \c NestingLevel on the current line.
unsigned LowestLevelOnLine;
/// \brief The start column of the string literal, if we're in a string
/// literal sequence, 0 otherwise.
unsigned StartOfStringLiteral;
/// \brief A stack keeping track of properties applying to parenthesis
/// levels.
std::vector<ParenState> Stack;
/// \brief Ignore the stack of \c ParenStates for state comparison.
///
/// In long and deeply nested unwrapped lines, the current algorithm can
/// be insufficient for finding the best formatting with a reasonable amount
/// of time and memory. Setting this flag will effectively lead to the
/// algorithm not analyzing some combinations. However, these combinations
/// rarely contain the optimal solution: In short, accepting a higher
/// penalty early would need to lead to different values in the \c
/// ParenState stack (in an otherwise identical state) and these different
/// values would need to lead to a significant amount of avoided penalty
/// later.
///
/// FIXME: Come up with a better algorithm instead.
bool IgnoreStackForComparison;
/// \brief The indent of the first token.
unsigned FirstIndent;
/// \brief The line that is being formatted.
///
/// Does not need to be considered for memoization because it doesn't change.
const AnnotatedLine *Line;
/// \brief Comparison operator to be able to used \c LineState in \c map.
bool operator<(const LineState &Other) const {
if (NextToken != Other.NextToken)
return NextToken < Other.NextToken;
if (Column != Other.Column)
return Column < Other.Column;
if (LineContainsContinuedForLoopSection !=
Other.LineContainsContinuedForLoopSection)
return LineContainsContinuedForLoopSection;
if (NoContinuation != Other.NoContinuation)
return NoContinuation;
if (StartOfLineLevel != Other.StartOfLineLevel)
return StartOfLineLevel < Other.StartOfLineLevel;
if (LowestLevelOnLine != Other.LowestLevelOnLine)
return LowestLevelOnLine < Other.LowestLevelOnLine;
if (StartOfStringLiteral != Other.StartOfStringLiteral)
return StartOfStringLiteral < Other.StartOfStringLiteral;
if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
return false;
return Stack < Other.Stack;
}
};
} // end namespace format
} // end namespace clang
#endif

View File

@@ -0,0 +1,128 @@
//===--- Encoding.h - Format C++ code -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Contains functions for text encoding manipulation. Supports UTF-8,
/// 8-bit encodings and escape sequences in C++ string literals.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_ENCODING_H
#define LLVM_CLANG_LIB_FORMAT_ENCODING_H
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Unicode.h"
namespace clang {
namespace format {
namespace encoding {
enum Encoding {
Encoding_UTF8,
Encoding_Unknown // We treat all other encodings as 8-bit encodings.
};
/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
/// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
inline Encoding detectEncoding(StringRef Text) {
const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin());
const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end());
if (llvm::isLegalUTF8String(&Ptr, BufEnd))
return Encoding_UTF8;
return Encoding_Unknown;
}
/// \brief Returns the number of columns required to display the \p Text on a
/// generic Unicode-capable terminal. Text is assumed to use the specified
/// \p Encoding.
inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
if (Encoding == Encoding_UTF8) {
int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
// FIXME: Figure out the correct way to handle this in the presence of both
// printable and unprintable multi-byte UTF-8 characters. Falling back to
// returning the number of bytes may cause problems, as columnWidth suddenly
// becomes non-additive.
if (ContentWidth >= 0)
return ContentWidth;
}
return Text.size();
}
/// \brief Returns the number of columns required to display the \p Text,
/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
/// text is assumed to use the specified \p Encoding.
inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
unsigned TabWidth, Encoding Encoding) {
unsigned TotalWidth = 0;
StringRef Tail = Text;
for (;;) {
StringRef::size_type TabPos = Tail.find('\t');
if (TabPos == StringRef::npos)
return TotalWidth + columnWidth(Tail, Encoding);
TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding);
TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
Tail = Tail.substr(TabPos + 1);
}
}
/// \brief Gets the number of bytes in a sequence representing a single
/// codepoint and starting with FirstChar in the specified Encoding.
inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
switch (Encoding) {
case Encoding_UTF8:
return llvm::getNumBytesForUTF8(FirstChar);
default:
return 1;
}
}
inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; }
inline bool isHexDigit(char c) {
return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F');
}
/// \brief Gets the length of an escape sequence inside a C++ string literal.
/// Text should span from the beginning of the escape sequence (starting with a
/// backslash) to the end of the string literal.
inline unsigned getEscapeSequenceLength(StringRef Text) {
assert(Text[0] == '\\');
if (Text.size() < 2)
return 1;
switch (Text[1]) {
case 'u':
return 6;
case 'U':
return 10;
case 'x': {
unsigned I = 2; // Point after '\x'.
while (I < Text.size() && isHexDigit(Text[I]))
++I;
return I;
}
default:
if (isOctDigit(Text[1])) {
unsigned I = 1;
while (I < Text.size() && I < 4 && isOctDigit(Text[I]))
++I;
return I;
}
return 1 + llvm::getNumBytesForUTF8(Text[1]);
}
}
} // namespace encoding
} // namespace format
} // namespace clang
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file declares Format APIs to be used internally by the
/// formatting library implementation.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
#include "BreakableToken.h"
#include "clang/Tooling/Core/Lookup.h"
#include <utility>
namespace clang {
namespace format {
namespace internal {
/// \brief Reformats the given \p Ranges in the code fragment \p Code.
///
/// A fragment of code could conceptually be surrounded by other code that might
/// constrain how that fragment is laid out.
/// For example, consider the fragment of code between 'R"(' and ')"',
/// exclusive, in the following code:
///
/// void outer(int x) {
/// string inner = R"(name: data
/// ^ FirstStartColumn
/// value: {
/// x: 1
/// ^ NextStartColumn
/// }
/// )";
/// ^ LastStartColumn
/// }
///
/// The outer code can influence the inner fragment as follows:
/// * \p FirstStartColumn specifies the column at which \p Code starts.
/// * \p NextStartColumn specifies the additional indent dictated by the
/// surrounding code. It is applied to the rest of the lines of \p Code.
/// * \p LastStartColumn specifies the column at which the last line of
/// \p Code should end, in case the last line is an empty line.
///
/// In the case where the last line of the fragment contains content,
/// the fragment ends at the end of that content and \p LastStartColumn is
/// not taken into account, for example in:
///
/// void block() {
/// string inner = R"(name: value)";
/// }
///
/// Each range is extended on either end to its next bigger logic unit, i.e.
/// everything that might influence its formatting or might be influenced by its
/// formatting.
///
/// Returns a pair P, where:
/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply
/// with \p Style.
/// * P.second is the penalty induced by formatting the fragment \p Code.
/// If the formatting of the fragment doesn't have a notion of penalty,
/// returns 0.
///
/// If ``Status`` is non-null, its value will be populated with the status of
/// this formatting attempt. See \c FormattingAttemptStatus.
std::pair<tooling::Replacements, unsigned>
reformat(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
FormattingAttemptStatus *Status);
} // namespace internal
} // namespace format
} // namespace clang
#endif

View File

@@ -0,0 +1,306 @@
//===--- FormatToken.cpp - Format C++ code --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements specific functions of \c FormatTokens and their
/// roles.
///
//===----------------------------------------------------------------------===//
#include "FormatToken.h"
#include "ContinuationIndenter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include <climits>
namespace clang {
namespace format {
const char *getTokenTypeName(TokenType Type) {
static const char *const TokNames[] = {
#define TYPE(X) #X,
LIST_TOKEN_TYPES
#undef TYPE
nullptr};
if (Type < NUM_TOKEN_TYPES)
return TokNames[Type];
llvm_unreachable("unknown TokenType");
return nullptr;
}
// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
// duplication.
bool FormatToken::isSimpleTypeSpecifier() const {
switch (Tok.getKind()) {
case tok::kw_short:
case tok::kw_long:
case tok::kw___int64:
case tok::kw___int128:
case tok::kw_signed:
case tok::kw_unsigned:
case tok::kw_void:
case tok::kw_char:
case tok::kw_int:
case tok::kw_half:
case tok::kw_float:
case tok::kw_double:
case tok::kw__Float16:
case tok::kw___float128:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw___underlying_type:
case tok::annot_typename:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_typeof:
case tok::kw_decltype:
return true;
default:
return false;
}
}
TokenRole::~TokenRole() {}
void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
unsigned CommaSeparatedList::formatAfterToken(LineState &State,
ContinuationIndenter *Indenter,
bool DryRun) {
if (State.NextToken == nullptr || !State.NextToken->Previous)
return 0;
if (Formats.size() == 1)
return 0; // Handled by formatFromToken
// Ensure that we start on the opening brace.
const FormatToken *LBrace =
State.NextToken->Previous->getPreviousNonComment();
if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
LBrace->Next->Type == TT_DesignatedInitializerPeriod)
return 0;
// Calculate the number of code points we have to format this list. As the
// first token is already placed, we have to subtract it.
unsigned RemainingCodePoints =
Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
// Find the best ColumnFormat, i.e. the best number of columns to use.
const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
// If no ColumnFormat can be used, the braced list would generally be
// bin-packed. Add a severe penalty to this so that column layouts are
// preferred if possible.
if (!Format)
return 10000;
// Format the entire list.
unsigned Penalty = 0;
unsigned Column = 0;
unsigned Item = 0;
while (State.NextToken != LBrace->MatchingParen) {
bool NewLine = false;
unsigned ExtraSpaces = 0;
// If the previous token was one of our commas, we are now on the next item.
if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
if (!State.NextToken->isTrailingComment()) {
ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
++Column;
}
++Item;
}
if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
Column = 0;
NewLine = true;
}
// Place token using the continuation indenter and store the penalty.
Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
}
return Penalty;
}
unsigned CommaSeparatedList::formatFromToken(LineState &State,
ContinuationIndenter *Indenter,
bool DryRun) {
// Formatting with 1 Column isn't really a column layout, so we don't need the
// special logic here. We can just avoid bin packing any of the parameters.
if (Formats.size() == 1 || HasNestedBracedList)
State.Stack.back().AvoidBinPacking = true;
return 0;
}
// Returns the lengths in code points between Begin and End (both included),
// assuming that the entire sequence is put on a single line.
static unsigned CodePointsBetween(const FormatToken *Begin,
const FormatToken *End) {
assert(End->TotalLength >= Begin->TotalLength);
return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
}
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
// FIXME: At some point we might want to do this for other lists, too.
if (!Token->MatchingParen ||
!Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
return;
// In C++11 braced list style, we should not format in columns unless they
// have many items (20 or more) or we allow bin-packing of function call
// arguments.
if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
Commas.size() < 19)
return;
// Limit column layout for JavaScript array initializers to 20 or more items
// for now to introduce it carefully. We can become more aggressive if this
// necessary.
if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
return;
// Column format doesn't really make sense if we don't align after brackets.
if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
return;
FormatToken *ItemBegin = Token->Next;
while (ItemBegin->isTrailingComment())
ItemBegin = ItemBegin->Next;
SmallVector<bool, 8> MustBreakBeforeItem;
// The lengths of an item if it is put at the end of the line. This includes
// trailing comments which are otherwise ignored for column alignment.
SmallVector<unsigned, 8> EndOfLineItemLength;
bool HasSeparatingComment = false;
for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
// Skip comments on their own line.
while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
ItemBegin = ItemBegin->Next;
HasSeparatingComment = i > 0;
}
MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
if (ItemBegin->is(tok::l_brace))
HasNestedBracedList = true;
const FormatToken *ItemEnd = nullptr;
if (i == Commas.size()) {
ItemEnd = Token->MatchingParen;
const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
if (Style.Cpp11BracedListStyle &&
!ItemEnd->Previous->isTrailingComment()) {
// In Cpp11 braced list style, the } and possibly other subsequent
// tokens will need to stay on a line with the last element.
while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
ItemEnd = ItemEnd->Next;
} else {
// In other braced lists styles, the "}" can be wrapped to the new line.
ItemEnd = Token->MatchingParen->Previous;
}
} else {
ItemEnd = Commas[i];
// The comma is counted as part of the item when calculating the length.
ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
// Consume trailing comments so the are included in EndOfLineItemLength.
if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
ItemEnd->Next->isTrailingComment())
ItemEnd = ItemEnd->Next;
}
EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
// If there is a trailing comma in the list, the next item will start at the
// closing brace. Don't create an extra item for this.
if (ItemEnd->getNextNonComment() == Token->MatchingParen)
break;
ItemBegin = ItemEnd->Next;
}
// Don't use column layout for lists with few elements and in presence of
// separating comments.
if (Commas.size() < 5 || HasSeparatingComment)
return;
if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
return;
// We can never place more than ColumnLimit / 3 items in a row (because of the
// spaces and the comma).
unsigned MaxItems = Style.ColumnLimit / 3;
std::vector<unsigned> MinSizeInColumn;
MinSizeInColumn.reserve(MaxItems);
for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
ColumnFormat Format;
Format.Columns = Columns;
Format.ColumnSizes.resize(Columns);
MinSizeInColumn.assign(Columns, UINT_MAX);
Format.LineCount = 1;
bool HasRowWithSufficientColumns = false;
unsigned Column = 0;
for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
assert(i < MustBreakBeforeItem.size());
if (MustBreakBeforeItem[i] || Column == Columns) {
++Format.LineCount;
Column = 0;
}
if (Column == Columns - 1)
HasRowWithSufficientColumns = true;
unsigned Length =
(Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
++Column;
}
// If all rows are terminated early (e.g. by trailing comments), we don't
// need to look further.
if (!HasRowWithSufficientColumns)
break;
Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
for (unsigned i = 0; i < Columns; ++i)
Format.TotalWidth += Format.ColumnSizes[i];
// Don't use this Format, if the difference between the longest and shortest
// element in a column exceeds a threshold to avoid excessive spaces.
if ([&] {
for (unsigned i = 0; i < Columns - 1; ++i)
if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
return true;
return false;
}())
continue;
// Ignore layouts that are bound to violate the column limit.
if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
continue;
Formats.push_back(Format);
}
}
const CommaSeparatedList::ColumnFormat *
CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
const ColumnFormat *BestFormat = nullptr;
for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
I = Formats.rbegin(),
E = Formats.rend();
I != E; ++I) {
if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
if (BestFormat && I->LineCount > BestFormat->LineCount)
break;
BestFormat = &*I;
}
}
return BestFormat;
}
} // namespace format
} // namespace clang

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,117 @@
//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file contains FormatTokenLexer, which tokenizes a source file
/// into a token stream suitable for ClangFormat.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
#include "Encoding.h"
#include "FormatToken.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
#include <stack>
namespace clang {
namespace format {
enum LexerState {
NORMAL,
TEMPLATE_STRING,
TOKEN_STASHED,
};
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
const FormatStyle &Style, encoding::Encoding Encoding);
ArrayRef<FormatToken *> lex();
const AdditionalKeywords &getKeywords() { return Keywords; }
private:
void tryMergePreviousTokens();
bool tryMergeLessLess();
bool tryMergeNSStringLiteral();
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
bool precedesOperand(FormatToken *Tok);
bool canPrecedeRegexLiteral(FormatToken *Prev);
// Tries to parse a JavaScript Regex literal starting at the current token,
// if that begins with a slash and is in a location where JavaScript allows
// regex literals. Changes the current token to a regex literal and updates
// its text if successful.
void tryParseJSRegexLiteral();
// Handles JavaScript template strings.
//
// JavaScript template strings use backticks ('`') as delimiters, and allow
// embedding expressions nested in ${expr-here}. Template strings can be
// nested recursively, i.e. expressions can contain template strings in turn.
//
// The code below parses starting from a backtick, up to a closing backtick or
// an opening ${. It also maintains a stack of lexing contexts to handle
// nested template parts by balancing curly braces.
void handleTemplateStrings();
void tryParsePythonComment();
bool tryMerge_TMacro();
bool tryMergeConflictMarkers();
FormatToken *getStashedToken();
FormatToken *getNextToken();
FormatToken *FormatTok;
bool IsFirstToken;
std::stack<LexerState> StateStack;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
const SourceManager &SourceMgr;
FileID ID;
const FormatStyle &Style;
IdentifierTable IdentTable;
AdditionalKeywords Keywords;
encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
// Index (in 'Tokens') of the last token that starts a new line.
unsigned FirstInLineIndex;
SmallVector<FormatToken *, 16> Tokens;
SmallVector<IdentifierInfo *, 8> ForEachMacros;
bool FormattingDisabled;
llvm::Regex MacroBlockBeginRegex;
llvm::Regex MacroBlockEndRegex;
void readRawToken(FormatToken &Tok);
void resetLexer(unsigned Offset);
};
} // namespace format
} // namespace clang
#endif

View File

@@ -0,0 +1,213 @@
//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
/// fixes namespace end comments.
///
//===----------------------------------------------------------------------===//
#include "NamespaceEndCommentsFixer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"
#define DEBUG_TYPE "namespace-end-comments-fixer"
namespace clang {
namespace format {
namespace {
// The maximal number of unwrapped lines that a short namespace spans.
// Short namespaces don't need an end comment.
static const int kShortNamespaceMaxLines = 1;
// Matches a valid namespace end comment.
// Valid namespace end comments don't need to be edited.
static llvm::Regex kNamespaceCommentPattern =
llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
"namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
llvm::Regex::IgnoreCase);
// Computes the name of a namespace given the namespace token.
// Returns "" for anonymous namespace.
std::string computeName(const FormatToken *NamespaceTok) {
assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) &&
"expecting a namespace token");
std::string name = "";
// Collects all the non-comment tokens between 'namespace' and '{'.
const FormatToken *Tok = NamespaceTok->getNextNonComment();
while (Tok && !Tok->is(tok::l_brace)) {
name += Tok->TokenText;
Tok = Tok->getNextNonComment();
}
return name;
}
std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) {
std::string text = "// namespace";
if (!NamespaceName.empty()) {
text += ' ';
text += NamespaceName;
}
if (AddNewline)
text += '\n';
return text;
}
bool hasEndComment(const FormatToken *RBraceTok) {
return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
}
bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) {
assert(hasEndComment(RBraceTok));
const FormatToken *Comment = RBraceTok->Next;
SmallVector<StringRef, 7> Groups;
if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
// Anonymous namespace comments must not mention a namespace name.
if (NamespaceName.empty() && !NamespaceNameInComment.empty())
return false;
StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
// Named namespace comments must not mention anonymous namespace.
if (!NamespaceName.empty() && !AnonymousInComment.empty())
return false;
return NamespaceNameInComment == NamespaceName;
}
return false;
}
void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
const SourceManager &SourceMgr,
tooling::Replacements *Fixes) {
auto EndLoc = RBraceTok->Tok.getEndLoc();
auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
if (Err) {
llvm::errs() << "Error while adding namespace end comment: "
<< llvm::toString(std::move(Err)) << "\n";
}
}
void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
const SourceManager &SourceMgr,
tooling::Replacements *Fixes) {
assert(hasEndComment(RBraceTok));
const FormatToken *Comment = RBraceTok->Next;
auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
Comment->Tok.getEndLoc());
auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
if (Err) {
llvm::errs() << "Error while updating namespace end comment: "
<< llvm::toString(std::move(Err)) << "\n";
}
}
const FormatToken *
getNamespaceToken(const AnnotatedLine *line,
const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
if (!line->Affected || line->InPPDirective || !line->startsWith(tok::r_brace))
return nullptr;
size_t StartLineIndex = line->MatchingOpeningBlockLineIndex;
if (StartLineIndex == UnwrappedLine::kInvalidIndex)
return nullptr;
assert(StartLineIndex < AnnotatedLines.size());
const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
if (NamespaceTok->is(tok::l_brace)) {
// "namespace" keyword can be on the line preceding '{', e.g. in styles
// where BraceWrapping.AfterNamespace is true.
if (StartLineIndex > 0)
NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
}
// Detect "(inline)? namespace" in the beginning of a line.
if (NamespaceTok->is(tok::kw_inline))
NamespaceTok = NamespaceTok->getNextNonComment();
if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace))
return nullptr;
return NamespaceTok;
}
} // namespace
NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
const FormatStyle &Style)
: TokenAnalyzer(Env, Style) {}
std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) {
const SourceManager &SourceMgr = Env.getSourceManager();
AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
AnnotatedLines.end());
tooling::Replacements Fixes;
std::string AllNamespaceNames = "";
size_t StartLineIndex = SIZE_MAX;
unsigned int CompactedNamespacesCount = 0;
for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
const AnnotatedLine *EndLine = AnnotatedLines[I];
const FormatToken *NamespaceTok =
getNamespaceToken(EndLine, AnnotatedLines);
if (!NamespaceTok)
continue;
FormatToken *RBraceTok = EndLine->First;
if (RBraceTok->Finalized)
continue;
RBraceTok->Finalized = true;
const FormatToken *EndCommentPrevTok = RBraceTok;
// Namespaces often end with '};'. In that case, attach namespace end
// comments to the semicolon tokens.
if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
EndCommentPrevTok = RBraceTok->Next;
}
if (StartLineIndex == SIZE_MAX)
StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
std::string NamespaceName = computeName(NamespaceTok);
if (Style.CompactNamespaces) {
if ((I + 1 < E) &&
getNamespaceToken(AnnotatedLines[I + 1], AnnotatedLines) &&
StartLineIndex - CompactedNamespacesCount - 1 ==
AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
!AnnotatedLines[I + 1]->First->Finalized) {
if (hasEndComment(EndCommentPrevTok)) {
// remove end comment, it will be merged in next one
updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
}
CompactedNamespacesCount++;
AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
continue;
}
NamespaceName += AllNamespaceNames;
CompactedNamespacesCount = 0;
AllNamespaceNames = std::string();
}
// The next token in the token stream after the place where the end comment
// token must be. This is either the next token on the current line or the
// first token on the next line.
const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
EndCommentNextTok = EndCommentNextTok->Next;
if (!EndCommentNextTok && I + 1 < E)
EndCommentNextTok = AnnotatedLines[I + 1]->First;
bool AddNewline = EndCommentNextTok &&
EndCommentNextTok->NewlinesBefore == 0 &&
EndCommentNextTok->isNot(tok::eof);
const std::string EndCommentText =
computeEndCommentText(NamespaceName, AddNewline);
if (!hasEndComment(EndCommentPrevTok)) {
bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
if (!isShort)
addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
} else if (!validEndComment(EndCommentPrevTok, NamespaceName)) {
updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
}
StartLineIndex = SIZE_MAX;
}
return {Fixes, 0};
}
} // namespace format
} // namespace clang

View File

@@ -0,0 +1,37 @@
//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that
/// fixes namespace end comments.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
#include "TokenAnalyzer.h"
namespace clang {
namespace format {
class NamespaceEndCommentsFixer : public TokenAnalyzer {
public:
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);
std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override;
};
} // end namespace format
} // end namespace clang
#endif

View File

@@ -0,0 +1,456 @@
//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements a sort operation for JavaScript ES6 imports.
///
//===----------------------------------------------------------------------===//
#include "SortJavaScriptImports.h"
#include "TokenAnalyzer.h"
#include "TokenAnnotator.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
#include <string>
#define DEBUG_TYPE "format-formatter"
namespace clang {
namespace format {
class FormatTokenLexer;
using clang::format::FormatStyle;
// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
struct JsImportedSymbol {
StringRef Symbol;
StringRef Alias;
SourceRange Range;
bool operator==(const JsImportedSymbol &RHS) const {
// Ignore Range for comparison, it is only used to stitch code together,
// but imports at different code locations are still conceptually the same.
return Symbol == RHS.Symbol && Alias == RHS.Alias;
}
};
// An ES6 module reference.
//
// ES6 implements a module system, where individual modules (~= source files)
// can reference other modules, either importing symbols from them, or exporting
// symbols from them:
// import {foo} from 'foo';
// export {foo};
// export {bar} from 'bar';
//
// `export`s with URLs are syntactic sugar for an import of the symbol from the
// URL, followed by an export of the symbol, allowing this code to treat both
// statements more or less identically, with the exception being that `export`s
// are sorted last.
//
// imports and exports support individual symbols, but also a wildcard syntax:
// import * as prefix from 'foo';
// export * from 'bar';
//
// This struct represents both exports and imports to build up the information
// required for sorting module references.
struct JsModuleReference {
bool IsExport = false;
// Module references are sorted into these categories, in order.
enum ReferenceCategory {
SIDE_EFFECT, // "import 'something';"
ABSOLUTE, // from 'something'
RELATIVE_PARENT, // from '../*'
RELATIVE, // from './*'
};
ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
// The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
StringRef URL;
// Prefix from "import * as prefix". Empty for symbol imports and `export *`.
// Implies an empty names list.
StringRef Prefix;
// Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
SmallVector<JsImportedSymbol, 1> Symbols;
// Textual position of the import/export, including preceding and trailing
// comments.
SourceRange Range;
};
bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
if (LHS.IsExport != RHS.IsExport)
return LHS.IsExport < RHS.IsExport;
if (LHS.Category != RHS.Category)
return LHS.Category < RHS.Category;
if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
// Side effect imports might be ordering sensitive. Consider them equal so
// that they maintain their relative order in the stable sort below.
// This retains transitivity because LHS.Category == RHS.Category here.
return false;
// Empty URLs sort *last* (for export {...};).
if (LHS.URL.empty() != RHS.URL.empty())
return LHS.URL.empty() < RHS.URL.empty();
if (int Res = LHS.URL.compare_lower(RHS.URL))
return Res < 0;
// '*' imports (with prefix) sort before {a, b, ...} imports.
if (LHS.Prefix.empty() != RHS.Prefix.empty())
return LHS.Prefix.empty() < RHS.Prefix.empty();
if (LHS.Prefix != RHS.Prefix)
return LHS.Prefix > RHS.Prefix;
return false;
}
// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
// structure, making it messy to sort them using regular expressions.
class JavaScriptImportSorter : public TokenAnalyzer {
public:
JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
: TokenAnalyzer(Env, Style),
FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override {
tooling::Replacements Result;
AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
AnnotatedLines.end());
const AdditionalKeywords &Keywords = Tokens.getKeywords();
SmallVector<JsModuleReference, 16> References;
AnnotatedLine *FirstNonImportLine;
std::tie(References, FirstNonImportLine) =
parseModuleReferences(Keywords, AnnotatedLines);
if (References.empty())
return {Result, 0};
SmallVector<unsigned, 16> Indices;
for (unsigned i = 0, e = References.size(); i != e; ++i)
Indices.push_back(i);
std::stable_sort(Indices.begin(), Indices.end(),
[&](unsigned LHSI, unsigned RHSI) {
return References[LHSI] < References[RHSI];
});
bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end());
std::string ReferencesText;
bool SymbolsInOrder = true;
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
JsModuleReference Reference = References[Indices[i]];
if (appendReference(ReferencesText, Reference))
SymbolsInOrder = false;
if (i + 1 < e) {
// Insert breaks between imports and exports.
ReferencesText += "\n";
// Separate imports groups with two line breaks, but keep all exports
// in a single group.
if (!Reference.IsExport &&
(Reference.IsExport != References[Indices[i + 1]].IsExport ||
Reference.Category != References[Indices[i + 1]].Category))
ReferencesText += "\n";
}
}
if (ReferencesInOrder && SymbolsInOrder)
return {Result, 0};
SourceRange InsertionPoint = References[0].Range;
InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
// The loop above might collapse previously existing line breaks between
// import blocks, and thus shrink the file. SortIncludes must not shrink
// overall source length as there is currently no re-calculation of ranges
// after applying source sorting.
// This loop just backfills trailing spaces after the imports, which are
// harmless and will be stripped by the subsequent formatting pass.
// FIXME: A better long term fix is to re-calculate Ranges after sorting.
unsigned PreviousSize = getSourceText(InsertionPoint).size();
while (ReferencesText.size() < PreviousSize) {
ReferencesText += " ";
}
// Separate references from the main code body of the file.
if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
ReferencesText += "\n";
DEBUG(llvm::dbgs() << "Replacing imports:\n"
<< getSourceText(InsertionPoint) << "\nwith:\n"
<< ReferencesText << "\n");
auto Err = Result.add(tooling::Replacement(
Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
ReferencesText));
// FIXME: better error handling. For now, just print error message and skip
// the replacement for the release version.
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
assert(false);
}
return {Result, 0};
}
private:
FormatToken *Current;
FormatToken *LineEnd;
FormatToken invalidToken;
StringRef FileContents;
void skipComments() { Current = skipComments(Current); }
FormatToken *skipComments(FormatToken *Tok) {
while (Tok && Tok->is(tok::comment))
Tok = Tok->Next;
return Tok;
}
void nextToken() {
Current = Current->Next;
skipComments();
if (!Current || Current == LineEnd->Next) {
// Set the current token to an invalid token, so that further parsing on
// this line fails.
invalidToken.Tok.setKind(tok::unknown);
Current = &invalidToken;
}
}
StringRef getSourceText(SourceRange Range) {
return getSourceText(Range.getBegin(), Range.getEnd());
}
StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
const SourceManager &SM = Env.getSourceManager();
return FileContents.substr(SM.getFileOffset(Begin),
SM.getFileOffset(End) - SM.getFileOffset(Begin));
}
// Appends ``Reference`` to ``Buffer``, returning true if text within the
// ``Reference`` changed (e.g. symbol order).
bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
// Sort the individual symbols within the import.
// E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
std::stable_sort(
Symbols.begin(), Symbols.end(),
[&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
return LHS.Symbol.compare_lower(RHS.Symbol) < 0;
});
if (Symbols == Reference.Symbols) {
// No change in symbol order.
StringRef ReferenceStmt = getSourceText(Reference.Range);
Buffer += ReferenceStmt;
return false;
}
// Stitch together the module reference start...
SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin();
SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd();
Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart);
// ... then the references in order ...
for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
if (I != Symbols.begin())
Buffer += ",";
Buffer += getSourceText(I->Range);
}
// ... followed by the module reference end.
Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd());
return true;
}
// Parses module references in the given lines. Returns the module references,
// and a pointer to the first "main code" line if that is adjacent to the
// affected lines of module references, nullptr otherwise.
std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>
parseModuleReferences(const AdditionalKeywords &Keywords,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
SmallVector<JsModuleReference, 16> References;
SourceLocation Start;
AnnotatedLine *FirstNonImportLine = nullptr;
bool AnyImportAffected = false;
for (auto Line : AnnotatedLines) {
Current = Line->First;
LineEnd = Line->Last;
skipComments();
if (Start.isInvalid() || References.empty())
// After the first file level comment, consider line comments to be part
// of the import that immediately follows them by using the previously
// set Start.
Start = Line->First->Tok.getLocation();
if (!Current) {
// Only comments on this line. Could be the first non-import line.
FirstNonImportLine = Line;
continue;
}
JsModuleReference Reference;
Reference.Range.setBegin(Start);
if (!parseModuleReference(Keywords, Reference)) {
if (!FirstNonImportLine)
FirstNonImportLine = Line; // if no comment before.
break;
}
FirstNonImportLine = nullptr;
AnyImportAffected = AnyImportAffected || Line->Affected;
Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
DEBUG({
llvm::dbgs() << "JsModuleReference: {"
<< "is_export: " << Reference.IsExport
<< ", cat: " << Reference.Category
<< ", url: " << Reference.URL
<< ", prefix: " << Reference.Prefix;
for (size_t i = 0; i < Reference.Symbols.size(); ++i)
llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
<< Reference.Symbols[i].Alias;
llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
llvm::dbgs() << "}\n";
});
References.push_back(Reference);
Start = SourceLocation();
}
// Sort imports if any import line was affected.
if (!AnyImportAffected)
References.clear();
return std::make_pair(References, FirstNonImportLine);
}
// Parses a JavaScript/ECMAScript 6 module reference.
// See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
// for grammar EBNF (production ModuleItem).
bool parseModuleReference(const AdditionalKeywords &Keywords,
JsModuleReference &Reference) {
if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
return false;
Reference.IsExport = Current->is(tok::kw_export);
nextToken();
if (Current->isStringLiteral() && !Reference.IsExport) {
// "import 'side-effect';"
Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
Reference.URL =
Current->TokenText.substr(1, Current->TokenText.size() - 2);
return true;
}
if (!parseModuleBindings(Keywords, Reference))
return false;
if (Current->is(Keywords.kw_from)) {
// imports have a 'from' clause, exports might not.
nextToken();
if (!Current->isStringLiteral())
return false;
// URL = TokenText without the quotes.
Reference.URL =
Current->TokenText.substr(1, Current->TokenText.size() - 2);
if (Reference.URL.startswith(".."))
Reference.Category =
JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
else if (Reference.URL.startswith("."))
Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
else
Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
} else {
// w/o URL groups with "empty".
Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
}
return true;
}
bool parseModuleBindings(const AdditionalKeywords &Keywords,
JsModuleReference &Reference) {
if (parseStarBinding(Keywords, Reference))
return true;
return parseNamedBindings(Keywords, Reference);
}
bool parseStarBinding(const AdditionalKeywords &Keywords,
JsModuleReference &Reference) {
// * as prefix from '...';
if (Current->isNot(tok::star))
return false;
nextToken();
if (Current->isNot(Keywords.kw_as))
return false;
nextToken();
if (Current->isNot(tok::identifier))
return false;
Reference.Prefix = Current->TokenText;
nextToken();
return true;
}
bool parseNamedBindings(const AdditionalKeywords &Keywords,
JsModuleReference &Reference) {
if (Current->is(tok::identifier)) {
nextToken();
if (Current->is(Keywords.kw_from))
return true;
if (Current->isNot(tok::comma))
return false;
nextToken(); // eat comma.
}
if (Current->isNot(tok::l_brace))
return false;
// {sym as alias, sym2 as ...} from '...';
while (Current->isNot(tok::r_brace)) {
nextToken();
if (Current->is(tok::r_brace))
break;
if (!Current->isOneOf(tok::identifier, tok::kw_default))
return false;
JsImportedSymbol Symbol;
Symbol.Symbol = Current->TokenText;
// Make sure to include any preceding comments.
Symbol.Range.setBegin(
Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin());
nextToken();
if (Current->is(Keywords.kw_as)) {
nextToken();
if (!Current->isOneOf(tok::identifier, tok::kw_default))
return false;
Symbol.Alias = Current->TokenText;
nextToken();
}
Symbol.Range.setEnd(Current->Tok.getLocation());
Reference.Symbols.push_back(Symbol);
if (!Current->isOneOf(tok::r_brace, tok::comma))
return false;
}
nextToken(); // consume r_brace
return true;
}
};
tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName) {
// FIXME: Cursor support.
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
JavaScriptImportSorter Sorter(*Env, Style);
return Sorter.process().first;
}
} // end namespace format
} // end namespace clang

View File

@@ -0,0 +1,36 @@
//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements a sorter for JavaScript ES6 imports.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
#define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
#include "clang/Basic/LLVM.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
namespace clang {
namespace format {
// Sort JavaScript ES6 imports/exports in ``Code``. The generated replacements
// only monotonically increase the length of the given code.
tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName);
} // end namespace format
} // end namespace clang
#endif

View File

@@ -0,0 +1,154 @@
//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements an abstract TokenAnalyzer and associated helper
/// classes. TokenAnalyzer can be extended to generate replacements based on
/// an annotated and pre-processed token stream.
///
//===----------------------------------------------------------------------===//
#include "TokenAnalyzer.h"
#include "AffectedRangeManager.h"
#include "Encoding.h"
#include "FormatToken.h"
#include "FormatTokenLexer.h"
#include "TokenAnnotator.h"
#include "UnwrappedLineParser.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "format-formatter"
namespace clang {
namespace format {
// This sets up an virtual file system with file \p FileName containing \p
// Code.
std::unique_ptr<Environment>
Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
ArrayRef<tooling::Range> Ranges,
unsigned FirstStartColumn,
unsigned NextStartColumn,
unsigned LastStartColumn) {
// This is referenced by `FileMgr` and will be released by `FileMgr` when it
// is deleted.
IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
new vfs::InMemoryFileSystem);
// This is passed to `SM` as reference, so the pointer has to be referenced
// in `Environment` so that `FileMgr` can out-live this function scope.
std::unique_ptr<FileManager> FileMgr(
new FileManager(FileSystemOptions(), InMemoryFileSystem));
// This is passed to `SM` as reference, so the pointer has to be referenced
// by `Environment` due to the same reason above.
std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
new DiagnosticOptions));
// This will be stored as reference, so the pointer has to be stored in
// due to the same reason above.
std::unique_ptr<SourceManager> VirtualSM(
new SourceManager(*Diagnostics, *FileMgr));
InMemoryFileSystem->addFile(
FileName, 0,
llvm::MemoryBuffer::getMemBuffer(Code, FileName,
/*RequiresNullTerminator=*/false));
FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
SourceLocation(), clang::SrcMgr::C_User);
assert(ID.isValid());
SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
std::vector<CharSourceRange> CharRanges;
for (const tooling::Range &Range : Ranges) {
SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
SourceLocation End = Start.getLocWithOffset(Range.getLength());
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
}
return llvm::make_unique<Environment>(
ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics),
CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn);
}
TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
: Style(Style), Env(Env),
AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
UnwrappedLines(1),
Encoding(encoding::detectEncoding(
Env.getSourceManager().getBufferData(Env.getFileID()))) {
DEBUG(
llvm::dbgs() << "File encoding: "
<< (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
<< "\n");
DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
<< "\n");
}
std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
tooling::Replacements Result;
FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
Env.getFirstStartColumn(), Style, Encoding);
UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
Env.getFirstStartColumn(), Tokens.lex(), *this);
Parser.parse();
assert(UnwrappedLines.rbegin()->empty());
unsigned Penalty = 0;
for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
SmallVector<AnnotatedLine *, 16> AnnotatedLines;
TokenAnnotator Annotator(Style, Tokens.getKeywords());
for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
Annotator.annotate(*AnnotatedLines.back());
}
std::pair<tooling::Replacements, unsigned> RunResult =
analyze(Annotator, AnnotatedLines, Tokens);
DEBUG({
llvm::dbgs() << "Replacements for run " << Run << ":\n";
for (tooling::Replacements::const_iterator I = RunResult.first.begin(),
E = RunResult.first.end();
I != E; ++I) {
llvm::dbgs() << I->toString() << "\n";
}
});
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
delete AnnotatedLines[i];
}
Penalty += RunResult.second;
for (const auto &R : RunResult.first) {
auto Err = Result.add(R);
// FIXME: better error handling here. For now, simply return an empty
// Replacements to indicate failure.
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
return {tooling::Replacements(), 0};
}
}
}
return {Result, Penalty};
}
void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
assert(!UnwrappedLines.empty());
UnwrappedLines.back().push_back(TheLine);
}
void TokenAnalyzer::finishRun() {
UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
}
} // end namespace format
} // end namespace clang

View File

@@ -0,0 +1,134 @@
//===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file declares an abstract TokenAnalyzer, and associated helper
/// classes. TokenAnalyzer can be extended to generate replacements based on
/// an annotated and pre-processed token stream.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
#define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
#include "AffectedRangeManager.h"
#include "Encoding.h"
#include "FormatToken.h"
#include "FormatTokenLexer.h"
#include "TokenAnnotator.h"
#include "UnwrappedLineParser.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
namespace clang {
namespace format {
class Environment {
public:
Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
: ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM),
FirstStartColumn(0),
NextStartColumn(0),
LastStartColumn(0) {}
Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
std::unique_ptr<SourceManager> VirtualSM,
std::unique_ptr<DiagnosticsEngine> Diagnostics,
const std::vector<CharSourceRange> &CharRanges,
unsigned FirstStartColumn,
unsigned NextStartColumn,
unsigned LastStartColumn)
: ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
SM(*VirtualSM),
FirstStartColumn(FirstStartColumn),
NextStartColumn(NextStartColumn),
LastStartColumn(LastStartColumn),
FileMgr(std::move(FileMgr)),
VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
// This sets up an virtual file system with file \p FileName containing the
// fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,
// that the next lines of \p Code should start at \p NextStartColumn, and
// that \p Code should end at \p LastStartColumn if it ends in newline.
// See also the documentation of clang::format::internal::reformat.
static std::unique_ptr<Environment>
CreateVirtualEnvironment(StringRef Code, StringRef FileName,
ArrayRef<tooling::Range> Ranges,
unsigned FirstStartColumn = 0,
unsigned NextStartColumn = 0,
unsigned LastStartColumn = 0);
FileID getFileID() const { return ID; }
ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
const SourceManager &getSourceManager() const { return SM; }
// Returns the column at which the fragment of code managed by this
// environment starts.
unsigned getFirstStartColumn() const { return FirstStartColumn; }
// Returns the column at which subsequent lines of the fragment of code
// managed by this environment should start.
unsigned getNextStartColumn() const { return NextStartColumn; }
// Returns the column at which the fragment of code managed by this
// environment should end if it ends in a newline.
unsigned getLastStartColumn() const { return LastStartColumn; }
private:
FileID ID;
SmallVector<CharSourceRange, 8> CharRanges;
SourceManager &SM;
unsigned FirstStartColumn;
unsigned NextStartColumn;
unsigned LastStartColumn;
// The order of these fields are important - they should be in the same order
// as they are created in `CreateVirtualEnvironment` so that they can be
// deleted in the reverse order as they are created.
std::unique_ptr<FileManager> FileMgr;
std::unique_ptr<SourceManager> VirtualSM;
std::unique_ptr<DiagnosticsEngine> Diagnostics;
};
class TokenAnalyzer : public UnwrappedLineConsumer {
public:
TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
std::pair<tooling::Replacements, unsigned> process();
protected:
virtual std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) = 0;
void consumeUnwrappedLine(const UnwrappedLine &TheLine) override;
void finishRun() override;
FormatStyle Style;
// Stores Style, FileID and SourceManager etc.
const Environment &Env;
// AffectedRangeMgr stores ranges to be fixed.
AffectedRangeManager AffectedRangeMgr;
SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
encoding::Encoding Encoding;
};
} // end namespace format
} // end namespace clang
#endif

Some files were not shown because too many files have changed in this diff Show More