Bug 761552 - Part 1: Provide UTF8ToUnicode functions accepting a buffer. r=jlebar

UTF8ToUnicode does not let the caller specify the buffer to use, but allocates
the buffer on it's own. Hence the caller cannot choose which allocator to use.
Rectify this by providing new APIs.

1. Split UTF8ToNewUnicode into:
   - CalcUTF8ToUnicodeLength
   - UTF8ToUnicodeBuffer
2. Rebuild UTF8ToNewUnicode using the new functions
This commit is contained in:
Nils Maier 2013-01-29 10:50:04 -05:00
parent dd286ff0cd
commit 9ccd762177
2 changed files with 60 additions and 11 deletions

View File

@ -120,6 +120,38 @@ PRUnichar* ToNewUnicode( const nsAString& aSource );
*/
PRUnichar* ToNewUnicode( const nsACString& aSource );
/**
* Returns the required length for a PRUnichar buffer holding
* a copy of aSource, using UTF-8 to UTF-16 conversion.
* The length does NOT include any space for zero-termination.
*
* @param aSource an 8-bit wide string, UTF-8 encoded
* @return length of UTF-16 encoded string copy, not zero-terminated
*/
uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );
/**
* Copies the source string into the specified buffer, converting UTF-8 to
* UTF-16 in the process. The conversion is well defined for valid UTF-8
* strings.
* The copied string will be zero-terminated! Any embedded nulls will be
* copied nonetheless. It is the caller's responsiblity to ensure the buffer
* is large enough to hold the string copy plus one PRUnichar for
* zero-termination!
*
* @see CalcUTF8ToUnicodeLength( const nsACString& )
* @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
*
* @param aSource an 8-bit wide string, UTF-8 encoded
* @param aBuffer the buffer holding the converted string copy
* @param aUTF16Count receiving optionally the number of 16-bit units that
* were copied
* @return aBuffer pointer, for convenience
*/
PRUnichar* UTF8ToUnicodeBuffer( const nsACString& aSource,
PRUnichar *aBuffer,
uint32_t *aUTF16Count = nullptr );
/**
* Returns a new |PRUnichar| buffer containing a zero-terminated copy
* of |aSource|.

View File

@ -325,28 +325,45 @@ ToNewUnicode( const nsACString& aSource )
return result;
}
PRUnichar*
UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )
uint32_t
CalcUTF8ToUnicodeLength( const nsACString& aSource)
{
nsACString::const_iterator start, end;
CalculateUTF8Length calculator;
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
calculator);
return calculator.Length();
}
PRUnichar*
UTF8ToUnicodeBuffer( const nsACString& aSource, PRUnichar* aBuffer, uint32_t *aUTF16Count )
{
nsACString::const_iterator start, end;
ConvertUTF8toUTF16 converter(aBuffer);
copy_string(aSource.BeginReading(start),
aSource.EndReading(end),
converter).write_terminator();
if (aUTF16Count)
*aUTF16Count = calculator.Length();
*aUTF16Count = converter.Length();
return aBuffer;
}
PRUnichar *result = static_cast<PRUnichar*>
(nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
if (!result)
PRUnichar*
UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )
{
const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
const size_t buffer_size = (length + 1) * sizeof(PRUnichar);
PRUnichar *buffer = static_cast<PRUnichar*>(nsMemory::Alloc(buffer_size));
if (!buffer)
return nullptr;
ConvertUTF8toUTF16 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter).write_terminator();
NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
uint32_t copied;
UTF8ToUnicodeBuffer(aSource, buffer, &copied);
NS_ASSERTION(length == copied, "length mismatch");
return result;
if (aUTF16Count)
*aUTF16Count = copied;
return buffer;
}
PRUnichar*