Files
UnrealEngineUWP/Engine/Source/Runtime/Slate/Private/Framework/Text/SyntaxTokenizer.cpp
Jamie Dale 134b4be302 Added a CamelCase break iterator for use by the text layout
TTP#322500 - UX: Add wrapping of Asset Names based on CamelCase and Underscores, not just spaces

- Refactored the existing break iterators to share the same interface so that they could be swapped out at runtime.
- Made the iterators recyclable (so you can change the string rather than have to allocate a new iterator).
- Removed the public interfaces for the different break iterator types - they're all now accessed via IBreakIterator and are created via the relevant function in FBreakIterator.
- Removed a load of duplication in the ICU break iterators.
- Made FTextLayout accept a custom line break iterator (as used by its wrapping cache).
- Fixed a crash where the legacy line or word break iterators could underflow/overflow the string range.

ReviewedBy Justin.Sargent

[CL 2248547 by Jamie Dale in Main branch]
2014-08-08 11:23:20 -04:00

88 lines
2.6 KiB
C++

// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
#include "SlatePrivatePCH.h"
#include "SyntaxTokenizer.h"
#include "BreakIterator.h"
TSharedRef< FSyntaxTokenizer > FSyntaxTokenizer::Create(TArray<FRule> InRules)
{
return MakeShareable(new FSyntaxTokenizer(MoveTemp(InRules)));
}
FSyntaxTokenizer::~FSyntaxTokenizer()
{
}
void FSyntaxTokenizer::Process(TArray<FTokenizedLine>& OutTokenizedLines, const FString& Input)
{
#if UE_ENABLE_ICU
TArray<FTextRange> LineRanges;
FTextRange::CalculateLineRangesFromString(Input, LineRanges);
TokenizeLineRanges(Input, LineRanges, OutTokenizedLines);
#else
FTokenizedLine FakeTokenizedLine;
FakeTokenizedLine.Range = FTextRange(0, Input.Len());
FakeTokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, FakeTokenizedLine.Range));
OutTokenizedLines.Add(FakeTokenizedLine);
#endif
}
FSyntaxTokenizer::FSyntaxTokenizer(TArray<FRule> InRules)
: Rules(MoveTemp(InRules))
{
}
void FSyntaxTokenizer::TokenizeLineRanges(const FString& Input, const TArray<FTextRange>& LineRanges, TArray<FTokenizedLine>& OutTokenizedLines)
{
TSharedRef<IBreakIterator> WBI = FBreakIterator::CreateWordBreakIterator();
WBI->SetString(Input);
// Tokenize line ranges
for(const FTextRange& LineRange : LineRanges)
{
FTokenizedLine TokenizedLine;
TokenizedLine.Range = LineRange;
if(TokenizedLine.Range.IsEmpty())
{
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, TokenizedLine.Range));
}
else
{
int32 CurrentOffset = LineRange.BeginIndex;
while(CurrentOffset < LineRange.EndIndex)
{
// First check for a match against any syntax token rules
bool bHasMatchedSyntax = false;
for(const FRule& Rule : Rules)
{
if(FCString::Strncmp(&Input[CurrentOffset], *Rule.MatchText, Rule.MatchText.Len()) == 0)
{
const int32 SyntaxTokenEnd = CurrentOffset + Rule.MatchText.Len();
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Syntax, FTextRange(CurrentOffset, SyntaxTokenEnd)));
check(SyntaxTokenEnd <= LineRange.EndIndex);
bHasMatchedSyntax = true;
CurrentOffset = SyntaxTokenEnd;
break;
}
}
if(bHasMatchedSyntax)
{
continue;
}
// If none matched, consume the character(s) as text
const int32 NextWordBoundary = WBI->MoveToCandidateAfter(CurrentOffset);
const int32 TextTokenEnd = (NextWordBoundary == INDEX_NONE) ? LineRange.EndIndex : FMath::Min(NextWordBoundary, LineRange.EndIndex);
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, FTextRange(CurrentOffset, TextTokenEnd)));
CurrentOffset = TextTokenEnd;
}
}
OutTokenizedLines.Add(TokenizedLine);
}
}