// Copyright Epic Games, Inc. All Rights Reserved. #include "XmlFile.h" #include "Misc/FileHelper.h" #include "Misc/ScopeExit.h" FXmlFile::FXmlFile(const FString& InFile, EConstructMethod::Type ConstructMethod) : RootNode(nullptr), bFileLoaded(false) { LoadFile(InFile, ConstructMethod); } bool FXmlFile::LoadFile(const FString& InFile, EConstructMethod::Type ConstructMethod) { // Remove any file stuff if it already exists Clear(); // So far no error (Early so it can be overwritten below by errors) ErrorMessage = NSLOCTEXT("XmlParser", "LoadSuccess", "XmlFile was loaded successfully").ToString(); TArray Input; if(ConstructMethod == EConstructMethod::ConstructFromFile) { // Read and split the file if (!FFileHelper::LoadFileToStringArray(Input, *InFile)) { ErrorMessage = NSLOCTEXT("XmlParser", "FileLoadFail", "Failed to load the file").ToString(); ErrorMessage += TEXT("\""); ErrorMessage += InFile; ErrorMessage += TEXT("\""); return false; } } else { // Parse input buffer into an array of lines InFile.ParseIntoArrayLines(Input, /*bCullEmpty*/false); } // Pre-process the input PreProcessInput(Input); // Tokenize the input TArray Tokens = Tokenize(Input); // Parse the input & create the nodes CreateNodes(Tokens); // All done with creation, set up necessary information if(bFileLoaded == true) { if(ConstructMethod == EConstructMethod::ConstructFromFile) { LoadedFile = InFile; } } else { LoadedFile = TEXT(""); RootNode = nullptr; } // Now check the status flag of the creation. It may have actually failed, but given us a // partially created representation if(bCreationFailed) { Clear(); } return bFileLoaded; } FString FXmlFile::GetLastError() const { return ErrorMessage; } void FXmlFile::Clear() { if(bFileLoaded) { check(RootNode != nullptr); RootNode->Delete(); delete RootNode; RootNode = nullptr; bFileLoaded = false; LoadedFile = TEXT(""); ErrorMessage = NSLOCTEXT("XmlParser", "ClearSuccess", "XmlFile was cleared successfully").ToString(); } } bool FXmlFile::IsValid() const { checkSlow(bFileLoaded ? RootNode != nullptr : RootNode == nullptr); return bFileLoaded; } const FXmlNode* FXmlFile::GetRootNode() const { return RootNode; } FXmlNode* FXmlFile::GetRootNode() { return RootNode; } bool FXmlFile::Save(const FString& Path) { FString Xml = TEXT("") LINE_TERMINATOR; const FXmlNode* CurrentNode = GetRootNode(); if(CurrentNode != nullptr) { WriteNodeHierarchy(*CurrentNode, FString(), Xml); } if (!FFileHelper::SaveStringToFile(Xml, *Path, FFileHelper::EEncodingOptions::ForceUTF8WithoutBOM)) { ErrorMessage = NSLOCTEXT("XmlParser", "FileSaveFail", "Failed to save the file").ToString(); ErrorMessage += FString::Printf(TEXT("\"%s\""), *Path); return false; } return true; } /** Checks if the passed character is a whitespace character */ static bool IsWhiteSpace(TCHAR Char) { // Whitespace will be any character that is not a common printed ASCII character (and not space/tab) if(Char == TCHAR(' ') || Char == TCHAR('\t') || Char < 32 ) // ' ' { return true; } return false; } void FXmlFile::PreProcessInput(TArray& Input) { // Note: This implementation is written simply and will not handle all cases. It // is made for the simple cases where FXmlFile is to be used. // Assumptions/Misc: // - Well-formatted file with 1 entry per line // - Ignoring versions, encodings, and doctypes // Remove white space at the beginning of lines for(int32 i = 0; i < Input.Num(); ++i) { int32 NumWhiteSpace = 0; for(int32 j = 0; j < Input[i].Len(); ++j) { if(!IsWhiteSpace(Input[i][j])) { break; } ++NumWhiteSpace; } if(NumWhiteSpace > 0) { Input[i].MidInline(NumWhiteSpace, MAX_int32, false); } } // Cull any text that can be removed on a line-based parse for(int32 i = 0; i < Input.Num(); ++i) { // Find ')) { // Found comment end, perform removal (simply replace all text with whitespace to be ignored by tokenizer) bInComment = false; int32 CommentLineEnd = i; int32 CommentIndexEnd = 2; WhiteOut(Input, CommentLineStart, CommentLineEnd, CommentIndexStart, CommentIndexEnd); } } } if(Input[i].Len() < 3) { continue; } int32 Indx1 = 0, Indx2 = 1, Indx3 = 2, Indx4 = 3; for(; Indx4 < Input[i].Len(); ++Indx1, ++Indx2, ++Indx3, ++Indx4) { // Looking for the start of a comment if(!bInComment) { if(Input[i][Indx1] == TCHAR('<') && Input[i][Indx2] == TCHAR('!') && Input[i][Indx3] == TCHAR('-') && Input[i][Indx4] == TCHAR('-')) { // Found comment start, mark it bInComment = true; CommentLineStart = i; CommentIndexStart = Indx1; } } // Looking for the end of a comment else { if( (Input[i][Indx2] == TCHAR('-') && Input[i][Indx3] == TCHAR('-') && Input[i][Indx4] == TCHAR('>')) || (Input[i][Indx1] == TCHAR('-') && Input[i][Indx2] == TCHAR('-') && Input[i][Indx3] == TCHAR('>')) ) { // Found comment end, perform removal (simply replace all text with whitespace to be ignored by tokenizer) bInComment = false; int32 CommentLineEnd = i; int32 CommentIndexEnd = Indx4; WhiteOut(Input, CommentLineStart, CommentLineEnd, CommentIndexStart, CommentIndexEnd); } } } } } void FXmlFile::WhiteOut(TArray& Input, int32 LineStart, int32 LineEnd, int32 IndexStart, int32 IndexEnd) { if(LineEnd < LineStart) { // Error!, malformed file with comment ends } // White-out first line if(LineEnd - LineStart > 0) { for(int32 i = IndexStart; i < Input[LineStart].Len(); ++i) { Input[LineStart][i] = TCHAR(' '); } } // White-out middle lines if(LineEnd - LineStart > 1) { int32 NumLines = LineEnd - LineStart - 1; int32 MidStart = LineStart + 1; for(int32 i = MidStart; i < MidStart + NumLines; ++i) { Input[i] = TEXT(""); } } // White-out last line if(LineEnd - LineStart > 0) { for(int32 i = 0; i <= IndexEnd; ++i) { Input[LineEnd][i] = TCHAR(' '); } } // White-out if comment is only on 1 line if(LineStart == LineEnd) { for(int32 i = IndexStart; i <= IndexEnd; ++i) { Input[LineStart][i] = TCHAR(' '); } } } /** Checks if the passed character is an operator */ static bool CheckTagOperator(const TCHAR* PtrStart, const TCHAR* Ptr, const TCHAR* PtrEnd) { checkSlow(PtrStart <= Ptr && Ptr < PtrEnd); TCHAR Ch = *Ptr; if(Ch == TCHAR('/')) { const TCHAR* PtrNext = Ptr + 1; if(PtrNext != PtrEnd && *PtrNext == TCHAR('>')) { return true; } // check either the next or previous chars are tag closures - otherwise, this is just a slash if(Ptr != PtrStart && *(Ptr - 1) == TCHAR('<')) { return true; } } else if(Ch == TCHAR('<') || Ch == TCHAR('>')) { return true; } return false; } /** Checks if the passed character is an operator */ static bool IsPartOfTagOperator(TCHAR Char) { if(Char == TCHAR('<') || Char == TCHAR('/') || Char == TCHAR('>')) { return true; } return false; } /** Checks if the passed character is a quote */ static bool IsQuote(TCHAR Char) { return Char == TCHAR('\"'); } void FXmlFile::Tokenize(FStringView Input, TArray& Tokens) { FString WorkingToken; ON_SCOPE_EXIT { // Add working token if it still exists if (WorkingToken.Len()) { Tokens.Add(MoveTemp(WorkingToken)); } }; enum TOKENTYPE { OPERATOR, STRING } Type = STRING; bool bInQuote = false; const TCHAR* PtrStart = GetData(Input); const TCHAR* PtrEnd = PtrStart + GetNum(Input); for (const TCHAR* Ptr = PtrStart; Ptr != PtrEnd; ++Ptr) { TCHAR Ch = *Ptr; if(IsWhiteSpace(Ch) && !bInQuote) { // End the current token if(WorkingToken.Len()) { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); } continue; } // Mark the start of a token if(!WorkingToken.Len()) { WorkingToken += Ch; if(CheckTagOperator(PtrStart, Ptr, PtrEnd)) { // Add the working token if it's final (ie: ends with '>') if (Ch == TCHAR('>')) { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); } else { Type = OPERATOR; } } else { Type = STRING; } continue; } // Already in a token, so continue parsing if(Type == OPERATOR) { // Still the tag, so add it to the working token if(CheckTagOperator(PtrStart, Ptr, PtrEnd)) { WorkingToken += Ch; // Add the working token if it's final (ie: ends with '>') if (Ch == TCHAR('>')) { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); } } // Not a tag operator anymore, so add the old token and start a new one else { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); WorkingToken += Ch; Type = STRING; } } else // STRING { if (IsQuote(Ch)) { bInQuote = !bInQuote; } // Still a string. Allow '>' within a string if(!CheckTagOperator(PtrStart, Ptr, PtrEnd) || (bInQuote && Ch == TCHAR('>'))) { WorkingToken += Ch; } // Moving back to operator else { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); WorkingToken += Ch; bInQuote = false; // Add the working token if it's final (ie: ends with '>') if (Ch == TCHAR('>')) { Tokens.Add(MoveTemp(WorkingToken)); checkSlow(WorkingToken.Len() == 0); } else { Type = OPERATOR; } } } } } TArray FXmlFile::Tokenize(const TArray& Input) { TArray Tokens; Tokens.Reserve(Input.Num()); for(const FString& Line : Input) { Tokenize(Line, Tokens); } return Tokens; } /** Checks if the passed string is an important tag operator */ static bool IsTagOperator(FString ToCheck) { if(ToCheck == TEXT("<") || ToCheck == TEXT(">") || ToCheck == TEXT("")) { return true; } return false; } void FXmlFile::AddAttribute(const FString& InToken, TArray& OutAttributes) { int32 EqualsIdx; if(InToken.FindChar(TEXT('='), EqualsIdx)) { bool bQuotesRemoved = false; FString Value = InToken.Mid(EqualsIdx + 1); Value.TrimQuotesInline(&bQuotesRemoved); // The xml spec allows single and double quotes, so this is a valid case to support: https://www.w3.org/TR/xml/#NT-AttValue if (!bQuotesRemoved) { Value.TrimCharInline('\'', &bQuotesRemoved); } if(bQuotesRemoved) { int32 AmpIdx; if(Value.FindChar(TEXT('&'), AmpIdx)) { Value.ReplaceInline(TEXT("""), TEXT("\""), ESearchCase::CaseSensitive); Value.ReplaceInline(TEXT("&"), TEXT("&"), ESearchCase::CaseSensitive); Value.ReplaceInline(TEXT("'"), TEXT("'"), ESearchCase::CaseSensitive); Value.ReplaceInline(TEXT("<"), TEXT("<"), ESearchCase::CaseSensitive); Value.ReplaceInline(TEXT(">"), TEXT(">"), ESearchCase::CaseSensitive); } OutAttributes.Add(FXmlAttribute(InToken.Left(EqualsIdx), Value)); } } } FXmlNode* FXmlFile::CreateNodeRecursive(const TArray& Tokens, int32 StartIndex, int32* OutNextIndex) { // Algorithm (Draft): // - First found token should always be a '<' // - Extract tag // - Check next token // - If '<', recursively deepen // - If string token, parse until '<' found to recursively deepen // - Continue parsing until for self is found // - Return own constructed node (and index of next starting point const int32 RecursiveStartIndex = StartIndex; int32 SavedIndex = StartIndex; // Get the tag & any attributes FString Tag; TArray Attributes; bool bInTag = false; for(int32 i = StartIndex; i < Tokens.Num(); ++i) { if(bCreationFailed) { break; } if(Tokens[i].Len() == 0 || Tokens[i] == TEXT("\n") || Tokens[i] == TEXT("\r") || Tokens[i] == TEXT("\n\r")) { continue; } // Looking for tag start if(!bInTag) { if(Tokens[i] == TEXT("<")) { bInTag = true; } else { // Error: Found text before any operators (ex: plist>) bCreationFailed = true; ErrorMessage = NSLOCTEXT("XmlParser", "MalformedXMLFile", "Malformed Xml File").ToString(); } } // Already found tag start, reading inside of it now else if(bInTag) { // Text for the tag if(!IsTagOperator(Tokens[i])) { if(Tag.Len()) { // this could be an attribute AddAttribute(Tokens[i], Attributes); } else { Tag = Tokens[i]; } } // Found another tag operator else { // Closing the tag starting if(Tokens[i] == TEXT(">")) { SavedIndex = i + 1; // We want to find the content starting here possibly break; } // Closing the tag and also finalizing it else if(Tokens[i] == TEXT("/>")) { FXmlNode* NewNode = new FXmlNode(); NewNode->Tag = Tag; NewNode->Attributes = Attributes; if (OutNextIndex != nullptr) { *OutNextIndex = i + 1; } return NewNode; } // Error else { // Error: malformed file (ex: Tag = Tag; NewNode->Attributes = Attributes; // Got the tag. Continue and try to get the content FString Content; FString FinalTag; bInTag = false; for(int32 i = SavedIndex; i < Tokens.Num(); ++i) { if(bCreationFailed) { break; } if(Tokens[i].Len() == 0 || Tokens[i] == TEXT("\n") || Tokens[i] == TEXT("\r") || Tokens[i] == TEXT("\n\r")) { continue; } if(!bInTag) { // Found the start of another tag if(Tokens[i] == TEXT("<")) { // Recursively enter function creating a child at the new tag FXmlNode* Child = nullptr; if (i > RecursiveStartIndex) { Child = CreateNodeRecursive(Tokens, i, &SavedIndex); } // Save child to parent if(Child != nullptr) { NewNode->Children.Add(Child); } // Error creating a child else { // Some error occurred in creating a child. The algorithm should abort bCreationFailed = true; ErrorMessage = NSLOCTEXT("XmlParser", "MalformedXMLFile", "Malformed Xml File").ToString(); break; } // Continue algorithm at the returned index i = SavedIndex - 1; // (-1) because continuing will increment i; continue; } // Found what should be the end of the current tag else if(Tokens[i] == TEXT(">' bCreationFailed = true; ErrorMessage = NSLOCTEXT("XmlParser", "MalformedXMLFile", "Malformed Xml File").ToString(); } // Not an operator, save the text else { if(Content.Len() > 0) { Content += TEXT(" "); } Content += Tokens[i]; } } // In the finalizing tag else { // Text for the tag if(!IsTagOperator(Tokens[i])) { if(FinalTag.Len()) { // Ignore this bit of text then (they may be like options or modifiers or some such) } else { FinalTag = Tokens[i]; } } else { // Found the end of the final tag if(Tokens[i] == TEXT(">")) { // Error: Tag and ending tag don't match if(Tag != FinalTag) { // Error: Tag and ending tag don't match bCreationFailed = true; ErrorMessage = NSLOCTEXT("XmlParser", "MalformedXMLFile", "Malformed Xml File").ToString(); } // Check to make sure tags match and create the node and return it SavedIndex = i + 1; if(OutNextIndex != nullptr) // Might be null because of the top-level node not sending it in { *OutNextIndex = SavedIndex; } NewNode->Content = Content; return NewNode; } // Error else { // Error: malformed file (ex: stuff) bCreationFailed = true; ErrorMessage = NSLOCTEXT("XmlParser", "MalformedXMLFile", "Malformed Xml File").ToString(); } } } } // Something bad happened, return NULL! delete NewNode; return nullptr; } void FXmlFile::HookUpNextPtrs(FXmlNode* Node) { if(Node == nullptr) { return; } for(int32 i = 0; i < Node->Children.Num(); ++i) { HookUpNextPtrs(Node->Children[i]); if(i != Node->Children.Num() - 1) { Node->Children[i]->NextNode = Node->Children[i + 1]; } } } void FXmlFile::CreateNodes(const TArray& Tokens) { // Assumption..There is only 1 top-level node which contains everything inside of it bCreationFailed = false; FXmlNode* Root = CreateNodeRecursive(Tokens, 0); if(Root) { bFileLoaded = true; // Hook up next ptrs HookUpNextPtrs(Root); // Save it RootNode = Root; } else { bFileLoaded = false; ErrorMessage = NSLOCTEXT("XmlParser", "NodeCreateFail", "Failed to parse the loaded document").ToString(); } } void FXmlFile::WriteNodeHierarchy(const FXmlNode& Node, const FString& Indent, FString& Output) { // Write the tag Output += Indent + FString::Printf(TEXT("<%s"), *Node.GetTag()); for(const FXmlAttribute& Attribute: Node.GetAttributes()) { FString EscapedValue = Attribute.GetValue(); EscapedValue.ReplaceInline(TEXT("&"), TEXT("&"), ESearchCase::CaseSensitive); EscapedValue.ReplaceInline(TEXT("\""), TEXT("""), ESearchCase::CaseSensitive); EscapedValue.ReplaceInline(TEXT("'"), TEXT("'"), ESearchCase::CaseSensitive); EscapedValue.ReplaceInline(TEXT("<"), TEXT("<"), ESearchCase::CaseSensitive); EscapedValue.ReplaceInline(TEXT(">"), TEXT(">"), ESearchCase::CaseSensitive); Output += FString::Printf(TEXT(" %s=\"%s\""), *Attribute.GetTag(), *EscapedValue); } // Write the node contents const FXmlNode* FirstChildNode = Node.GetFirstChildNode(); if(FirstChildNode == nullptr) { const FString& Content = Node.GetContent(); if(Content.Len() == 0) { Output += TEXT(" />") LINE_TERMINATOR; } else { Output += TEXT(">") + Content + FString::Printf(TEXT(""), *Node.GetTag()) + LINE_TERMINATOR; } } else { Output += TEXT(">") LINE_TERMINATOR; for(const FXmlNode* ChildNode = FirstChildNode; ChildNode != nullptr; ChildNode = ChildNode->GetNextNode()) { WriteNodeHierarchy(*ChildNode, Indent + TEXT("\t"), Output); } Output += Indent + FString::Printf(TEXT(""), *Node.GetTag()) + LINE_TERMINATOR; } }