Files
UnrealEngineUWP/Engine/Source/Runtime/WebBrowser/Private/MobileJS/MobileJSStructDeserializerBackend.cpp
jamie dale a287480c36 Allow FString instances containing code units outside of the basic multilingual plane to be losslessly processed regardless of whether TCHAR is 2 or 4 bytes.
Most UE4 platforms use a 2-byte TCHAR, however some still use a 4-byte TCHAR. The platforms that use a 4-byte TCHAR expect their string data to be UTF-32, however there are parts of UE4 that serialize FString data as a series of UCS2CHAR, simply narrowing or widening each TCHAR in turn. This can result in invalid or corrupted UTF-32 strings (either UTF-32 strings containing UTF-16 surrogates, or UTF-32 code points that have been truncated to 2-bytes), which leads to either odd behavior or crashes.

This change updates the parts of UE4 that process FString data as a series of 2-byte values to do so on the correct UTF-16 interpretation of the data, converting to/from UTF-32 as required on platforms that use a 4-byte TCHAR. This conversion is a no-op on platforms that use a 2-byte TCHAR as the string is already assumed to be valid UTF-16 data. It should also be noted that while FString may contain UTF-16 code units on platforms using a 2-byte TCHAR, this change doesn't do anything to make FString represent a Unicode string on those platforms (ie, a string that understands and works on code points), but is rather just a bag of code units.

Two new variable-width string converters have be added to facilitate the conversion (modelled after the TCHAR<->UTF-8 converters), TUTF16ToUTF32_Convert and TUTF32ToUTF16_Convert. These are used for both TCHAR<->UTF16CHAR conversion when needed, but also for TCHAR<->wchar_t conversion on platforms that use char16_t for TCHAR along with having a 4-byte wchar_t (as defined by the new PLATFORM_WCHAR_IS_4_BYTES option).

These conversion routines are accessed either via the conversion macros (TCHAR_TO_UTF16, UTF16_TO_TCHAR, TCHAR_TO_WCHAR, and WCHAR_TO_TCHAR), or by using a conversion struct (FTCHARToUTF16, FUTF16ToTCHAR, FTCHARToWChar, and FWCharToTCHAR), which is the same pattern as the existing TCHAR<->UTF-8 conversion. Both the macros and the structs are defined as no-ops when the conversion isn't needed, but always exist so that code can be written in a portable way.

Very little code actually needed updating to use UTF-16, as the vast majority makes no assumptions about the size of TCHAR, nor how FString should be serialized. The main places were the FString archive serialization and the JSON reader/writer, along with some minor fixes to the UTF-8 conversion logic for platforms using a 4-byte TCHAR.

Tests have been added to verify that an FString representing a UTF-32 code point can be losslessly converted to/from UTF-8 and UTF-16, and serialized to/from an archive.

#jira
#rb Steve.Robb, Josh.Adams


#ROBOMERGE-SOURCE: CL 8676728 via CL 8687863
#ROBOMERGE-BOT: (v421-8677696)

[CL 8688048 by jamie dale in Main branch]
2019-09-16 05:44:11 -04:00

108 lines
3.0 KiB
C++

// Copyright 1998-2019 Epic Games, Inc. All Rights Reserved.
#include "MobileJSStructDeserializerBackend.h"
#if PLATFORM_ANDROID || PLATFORM_IOS
#include "MobileJSScripting.h"
#include "UObject/UnrealType.h"
#include "Templates/Casts.h"
namespace
{
// @todo: this function is copied from CEFJSStructDeserializerBackend.cpp. Move shared utility code to a common header file
/**
* Sets the value of the given property.
*
* @param Property The property to set.
* @param Outer The property that contains the property to be set, if any.
* @param Data A pointer to the memory holding the property's data.
* @param ArrayIndex The index of the element to set (if the property is an array).
* @return true on success, false otherwise.
* @see ClearPropertyValue
*/
template<typename UPropertyType, typename PropertyType>
bool SetPropertyValue( UProperty* Property, UProperty* Outer, void* Data, int32 ArrayIndex, const PropertyType& Value )
{
PropertyType* ValuePtr = nullptr;
UArrayProperty* ArrayProperty = Cast<UArrayProperty>(Outer);
if (ArrayProperty != nullptr)
{
if (ArrayProperty->Inner != Property)
{
return false;
}
FScriptArrayHelper ArrayHelper(ArrayProperty, ArrayProperty->template ContainerPtrToValuePtr<void>(Data));
int32 Index = ArrayHelper.AddValue();
ValuePtr = (PropertyType*)ArrayHelper.GetRawPtr(Index);
}
else
{
UPropertyType* TypedProperty = Cast<UPropertyType>(Property);
if (TypedProperty == nullptr || ArrayIndex >= TypedProperty->ArrayDim)
{
return false;
}
ValuePtr = TypedProperty->template ContainerPtrToValuePtr<PropertyType>(Data, ArrayIndex);
}
if (ValuePtr == nullptr)
{
return false;
}
*ValuePtr = Value;
return true;
}
}
bool FMobileJSStructDeserializerBackend::ReadProperty( UProperty* Property, UProperty* Outer, void* Data, int32 ArrayIndex )
{
switch (GetLastNotation())
{
case EJsonNotation::String:
{
if (Property->IsA<UStructProperty>())
{
UStructProperty* StructProperty = Cast<UStructProperty>(Property);
if ( StructProperty->Struct == FWebJSFunction::StaticStruct())
{
FGuid CallbackID;
if (!FGuid::Parse(GetReader()->GetValueAsString(), CallbackID))
{
return false;
}
FWebJSFunction CallbackObject(Scripting, CallbackID);
return SetPropertyValue<UStructProperty, FWebJSFunction>(Property, Outer, Data, ArrayIndex, CallbackObject);
}
}
}
break;
}
// If we reach this, default to parent class behavior
return FJsonStructDeserializerBackend::ReadProperty(Property, Outer, Data, ArrayIndex);
}
FMobileJSStructDeserializerBackend::FMobileJSStructDeserializerBackend(FMobileJSScriptingRef InScripting, const FString& JsonString)
: FJsonStructDeserializerBackend(Reader)
, Scripting(InScripting)
, JsonData()
, Reader(JsonData)
{
// Note: This is a no-op on platforms that are using a 16-bit TCHAR
FTCHARToUTF16 UTF16String(*JsonString, JsonString.Len());
JsonData.Append((uint8*)UTF16String.Get(), UTF16String.Length() * sizeof(UTF16CHAR));
}
#endif // PLATFORM_ANDROID || PLATFORM_IOS