gecko/intl/icu/source/common/ucnv.c
Jeff Walden 805dd78c93 Bug 924839 - Update our embedded ICU to 52.1, plus a very few local patches. r=lots of people, see subsequent lines in this commit message for the original subcomponents (merged together for landing), and the original bug for the original patch divisions
Bug 924839 - Remove a patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream.  r=glandium
* * *
Bug 924839 - Remove another patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10290 for that.  r=gaston
* * *
Bug 924839 - Remove another patch already in ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10045 for more.  r=Norbert
* * *
Bug 924839 - Remove another patch already applied upstream.  See http://bugs.icu-project.org/trac/changeset/32937 for more.  r=gaston
* * *
Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches.  r=glandium
* * *
Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else.  r=glandium
* * *
Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches).  These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/.  rs=the-usual-suspects
* * *
Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems.  r=gaston
* * *
Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows.  r=glandium
* * *
Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option.  See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing.  r=Norbert
* * *
Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows.  r=Norbert
* * *
Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step).  Thus if the update script is properly run, now, the final result should be no changes at all to the tree.  NOT REVIEWED YET
* * *
Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24.  r=Norbert
2013-11-12 16:23:48 -08:00

2927 lines
93 KiB
C

/*
******************************************************************************
*
* Copyright (C) 1998-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* ucnv.c:
* Implements APIs for the ICU's codeset conversion library;
* mostly calls through internal functions;
* created by Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
* 05/09/00 helena Added implementation to handle fallback mappings.
* 06/20/2000 helena OS/400 port changes; mostly typecast.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ustring.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/uset.h"
#include "unicode/utf.h"
#include "unicode/utf16.h"
#include "putilimp.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "utracimp.h"
#include "ustr_imp.h"
#include "ucnv_imp.h"
#include "ucnv_cnv.h"
#include "ucnv_bld.h"
/* size of intermediate and preflighting buffers in ucnv_convert() */
#define CHUNK_SIZE 1024
typedef struct UAmbiguousConverter {
const char *name;
const UChar variant5c;
} UAmbiguousConverter;
static const UAmbiguousConverter ambiguousConverters[]={
{ "ibm-897_P100-1995", 0xa5 },
{ "ibm-942_P120-1999", 0xa5 },
{ "ibm-943_P130-1999", 0xa5 },
{ "ibm-946_P100-1995", 0xa5 },
{ "ibm-33722_P120-1999", 0xa5 },
{ "ibm-1041_P100-1995", 0xa5 },
/*{ "ibm-54191_P100-2006", 0xa5 },*/
/*{ "ibm-62383_P100-2007", 0xa5 },*/
/*{ "ibm-891_P100-1995", 0x20a9 },*/
{ "ibm-944_P100-1995", 0x20a9 },
{ "ibm-949_P110-1999", 0x20a9 },
{ "ibm-1363_P110-1997", 0x20a9 },
{ "ISO_2022,locale=ko,version=0", 0x20a9 },
{ "ibm-1088_P100-1995", 0x20a9 }
};
/*Calls through createConverter */
U_CAPI UConverter* U_EXPORT2
ucnv_open (const char *name,
UErrorCode * err)
{
UConverter *r;
if (err == NULL || U_FAILURE (*err)) {
return NULL;
}
r = ucnv_createConverter(NULL, name, err);
return r;
}
U_CAPI UConverter* U_EXPORT2
ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
{
return ucnv_createConverterFromPackage(packageName, converterName, err);
}
/*Extracts the UChar* to a char* and calls through createConverter */
U_CAPI UConverter* U_EXPORT2
ucnv_openU (const UChar * name,
UErrorCode * err)
{
char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
if (err == NULL || U_FAILURE(*err))
return NULL;
if (name == NULL)
return ucnv_open (NULL, err);
if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
return ucnv_open(u_austrcpy(asciiName, name), err);
}
/* Copy the string that is represented by the UConverterPlatform enum
* @param platformString An output buffer
* @param platform An enum representing a platform
* @return the length of the copied string.
*/
static int32_t
ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
{
switch (pltfrm)
{
case UCNV_IBM:
uprv_strcpy(platformString, "ibm-");
return 4;
case UCNV_UNKNOWN:
break;
}
/* default to empty string */
*platformString = 0;
return 0;
}
/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
*through createConverter*/
U_CAPI UConverter* U_EXPORT2
ucnv_openCCSID (int32_t codepage,
UConverterPlatform platform,
UErrorCode * err)
{
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
int32_t myNameLen;
if (err == NULL || U_FAILURE (*err))
return NULL;
/* ucnv_copyPlatformString could return "ibm-" or "cp" */
myNameLen = ucnv_copyPlatformString(myName, platform);
T_CString_integerToString(myName + myNameLen, codepage, 10);
return ucnv_createConverter(NULL, myName, err);
}
/* Creating a temporary stack-based object that can be used in one thread,
and created from a converter that is shared across threads.
*/
U_CAPI UConverter* U_EXPORT2
ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
{
UConverter *localConverter, *allocatedConverter;
int32_t stackBufferSize;
int32_t bufferSizeNeeded;
char *stackBufferChars = (char *)stackBuffer;
UErrorCode cbErr;
UConverterToUnicodeArgs toUArgs = {
sizeof(UConverterToUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
UConverterFromUnicodeArgs fromUArgs = {
sizeof(UConverterFromUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
if (status == NULL || U_FAILURE(*status)){
UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
return NULL;
}
if (cnv == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
UTRACE_EXIT_STATUS(*status);
return NULL;
}
UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
ucnv_getName(cnv, status), cnv, stackBuffer);
if (cnv->sharedData->impl->safeClone != NULL) {
/* call the custom safeClone function for sizing */
bufferSizeNeeded = 0;
cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
if (U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status);
return NULL;
}
}
else
{
/* inherent sizing */
bufferSizeNeeded = sizeof(UConverter);
}
if (pBufferSize == NULL) {
stackBufferSize = 1;
pBufferSize = &stackBufferSize;
} else {
stackBufferSize = *pBufferSize;
if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
*pBufferSize = bufferSizeNeeded;
UTRACE_EXIT_VALUE(bufferSizeNeeded);
return NULL;
}
}
/* Pointers on 64-bit platforms need to be aligned
* on a 64-bit boundary in memory.
*/
if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
if(stackBufferSize > offsetUp) {
stackBufferSize -= offsetUp;
stackBufferChars += offsetUp;
} else {
/* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
stackBufferSize = 1;
}
}
stackBuffer = (void *)stackBufferChars;
/* Now, see if we must allocate any memory */
if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
{
/* allocate one here...*/
localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
if(localConverter == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
UTRACE_EXIT_STATUS(*status);
return NULL;
}
*status = U_SAFECLONE_ALLOCATED_WARNING;
/* record the fact that memory was allocated */
*pBufferSize = bufferSizeNeeded;
} else {
/* just use the stack buffer */
localConverter = (UConverter*) stackBuffer;
allocatedConverter = NULL;
}
uprv_memset(localConverter, 0, bufferSizeNeeded);
/* Copy initial state */
uprv_memcpy(localConverter, cnv, sizeof(UConverter));
localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
/* copy the substitution string */
if (cnv->subChars == (uint8_t *)cnv->subUChars) {
localConverter->subChars = (uint8_t *)localConverter->subUChars;
} else {
localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
if (localConverter->subChars == NULL) {
uprv_free(allocatedConverter);
UTRACE_EXIT_STATUS(*status);
return NULL;
}
uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
}
/* now either call the safeclone fcn or not */
if (cnv->sharedData->impl->safeClone != NULL) {
/* call the custom safeClone function */
localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
}
if(localConverter==NULL || U_FAILURE(*status)) {
if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
uprv_free(allocatedConverter->subChars);
}
uprv_free(allocatedConverter);
UTRACE_EXIT_STATUS(*status);
return NULL;
}
/* increment refcount of shared data if needed */
/*
Checking whether it's an algorithic converter is okay
in multithreaded applications because the value never changes.
Don't check referenceCounter for any other value.
*/
if (cnv->sharedData->referenceCounter != ~0) {
ucnv_incrementRefCount(cnv->sharedData);
}
if(localConverter == (UConverter*)stackBuffer) {
/* we're using user provided data - set to not destroy */
localConverter->isCopyLocal = TRUE;
}
/* allow callback functions to handle any memory allocation */
toUArgs.converter = fromUArgs.converter = localConverter;
cbErr = U_ZERO_ERROR;
cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
cbErr = U_ZERO_ERROR;
cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
UTRACE_EXIT_PTR_STATUS(localConverter, *status);
return localConverter;
}
/*Decreases the reference counter in the shared immutable section of the object
*and frees the mutable part*/
U_CAPI void U_EXPORT2
ucnv_close (UConverter * converter)
{
UErrorCode errorCode = U_ZERO_ERROR;
UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
if (converter == NULL)
{
UTRACE_EXIT();
return;
}
UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
/* In order to speed up the close, only call the callbacks when they have been changed.
This performance check will only work when the callbacks are set within a shared library
or from user code that statically links this code. */
/* first, notify the callback functions that the converter is closed */
if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
UConverterToUnicodeArgs toUArgs = {
sizeof(UConverterToUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
toUArgs.converter = converter;
errorCode = U_ZERO_ERROR;
converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
}
if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
UConverterFromUnicodeArgs fromUArgs = {
sizeof(UConverterFromUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
fromUArgs.converter = converter;
errorCode = U_ZERO_ERROR;
converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
}
if (converter->sharedData->impl->close != NULL) {
converter->sharedData->impl->close(converter);
}
if (converter->subChars != (uint8_t *)converter->subUChars) {
uprv_free(converter->subChars);
}
/*
Checking whether it's an algorithic converter is okay
in multithreaded applications because the value never changes.
Don't check referenceCounter for any other value.
*/
if (converter->sharedData->referenceCounter != ~0) {
ucnv_unloadSharedDataIfReady(converter->sharedData);
}
if(!converter->isCopyLocal){
uprv_free(converter);
}
UTRACE_EXIT();
}
/*returns a single Name from the list, will return NULL if out of bounds
*/
U_CAPI const char* U_EXPORT2
ucnv_getAvailableName (int32_t n)
{
if (0 <= n && n <= 0xffff) {
UErrorCode err = U_ZERO_ERROR;
const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
if (U_SUCCESS(err)) {
return name;
}
}
return NULL;
}
U_CAPI int32_t U_EXPORT2
ucnv_countAvailable ()
{
UErrorCode err = U_ZERO_ERROR;
return ucnv_bld_countAvailableConverters(&err);
}
U_CAPI void U_EXPORT2
ucnv_getSubstChars (const UConverter * converter,
char *mySubChar,
int8_t * len,
UErrorCode * err)
{
if (U_FAILURE (*err))
return;
if (converter->subCharLen <= 0) {
/* Unicode string or empty string from ucnv_setSubstString(). */
*len = 0;
return;
}
if (*len < converter->subCharLen) /*not enough space in subChars */
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
*len = converter->subCharLen; /*store # of bytes copied to buffer */
}
U_CAPI void U_EXPORT2
ucnv_setSubstChars (UConverter * converter,
const char *mySubChar,
int8_t len,
UErrorCode * err)
{
if (U_FAILURE (*err))
return;
/*Makes sure that the subChar is within the codepages char length boundaries */
if ((len > converter->sharedData->staticData->maxBytesPerChar)
|| (len < converter->sharedData->staticData->minBytesPerChar))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
converter->subCharLen = len; /*sets the new len */
/*
* There is currently (2001Feb) no separate API to set/get subChar1.
* In order to always have subChar written after it is explicitly set,
* we set subChar1 to 0.
*/
converter->subChar1 = 0;
return;
}
U_CAPI void U_EXPORT2
ucnv_setSubstString(UConverter *cnv,
const UChar *s,
int32_t length,
UErrorCode *err) {
UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
char chars[UCNV_ERROR_BUFFER_LENGTH];
UConverter *clone;
uint8_t *subChars;
int32_t cloneSize, length8;
/* Let the following functions check all arguments. */
cloneSize = sizeof(cloneBuffer);
clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
ucnv_close(clone);
if (U_FAILURE(*err)) {
return;
}
if (cnv->sharedData->impl->writeSub == NULL
#if !UCONFIG_NO_LEGACY_CONVERSION
|| (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
#endif
) {
/* The converter is not stateful. Store the charset bytes as a fixed string. */
subChars = (uint8_t *)chars;
} else {
/*
* The converter has a non-default writeSub() function, indicating
* that it is stateful.
* Store the Unicode string for on-the-fly conversion for correct
* state handling.
*/
if (length > UCNV_ERROR_BUFFER_LENGTH) {
/*
* Should not occur. The converter should output at least one byte
* per UChar, which means that ucnv_fromUChars() should catch all
* overflows.
*/
*err = U_BUFFER_OVERFLOW_ERROR;
return;
}
subChars = (uint8_t *)s;
if (length < 0) {
length = u_strlen(s);
}
length8 = length * U_SIZEOF_UCHAR;
}
/*
* For storing the substitution string, select either the small buffer inside
* UConverter or allocate a subChars buffer.
*/
if (length8 > UCNV_MAX_SUBCHAR_LEN) {
/* Use a separate buffer for the string. Outside UConverter to not make it too large. */
if (cnv->subChars == (uint8_t *)cnv->subUChars) {
/* Allocate a new buffer for the string. */
cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
if (cnv->subChars == NULL) {
cnv->subChars = (uint8_t *)cnv->subUChars;
*err = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
}
}
/* Copy the substitution string into the UConverter or its subChars buffer. */
if (length8 == 0) {
cnv->subCharLen = 0;
} else {
uprv_memcpy(cnv->subChars, subChars, length8);
if (subChars == (uint8_t *)chars) {
cnv->subCharLen = (int8_t)length8;
} else /* subChars == s */ {
cnv->subCharLen = (int8_t)-length;
}
}
/* See comment in ucnv_setSubstChars(). */
cnv->subChar1 = 0;
}
/*resets the internal states of a converter
*goal : have the same behaviour than a freshly created converter
*/
static void _reset(UConverter *converter, UConverterResetChoice choice,
UBool callCallback) {
if(converter == NULL) {
return;
}
if(callCallback) {
/* first, notify the callback functions that the converter is reset */
UErrorCode errorCode;
if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
UConverterToUnicodeArgs toUArgs = {
sizeof(UConverterToUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
toUArgs.converter = converter;
errorCode = U_ZERO_ERROR;
converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
}
if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
UConverterFromUnicodeArgs fromUArgs = {
sizeof(UConverterFromUnicodeArgs),
TRUE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
};
fromUArgs.converter = converter;
errorCode = U_ZERO_ERROR;
converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
}
}
/* now reset the converter itself */
if(choice<=UCNV_RESET_TO_UNICODE) {
converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
converter->mode = 0;
converter->toULength = 0;
converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
converter->preToULength = 0;
}
if(choice!=UCNV_RESET_TO_UNICODE) {
converter->fromUnicodeStatus = 0;
converter->fromUChar32 = 0;
converter->invalidUCharLength = converter->charErrorBufferLength = 0;
converter->preFromUFirstCP = U_SENTINEL;
converter->preFromULength = 0;
}
if (converter->sharedData->impl->reset != NULL) {
/* call the custom reset function */
converter->sharedData->impl->reset(converter, choice);
}
}
U_CAPI void U_EXPORT2
ucnv_reset(UConverter *converter)
{
_reset(converter, UCNV_RESET_BOTH, TRUE);
}
U_CAPI void U_EXPORT2
ucnv_resetToUnicode(UConverter *converter)
{
_reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
}
U_CAPI void U_EXPORT2
ucnv_resetFromUnicode(UConverter *converter)
{
_reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
}
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize (const UConverter * converter)
{
return converter->maxBytesPerUChar;
}
U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize (const UConverter * converter)
{
return converter->sharedData->staticData->minBytesPerChar;
}
U_CAPI const char* U_EXPORT2
ucnv_getName (const UConverter * converter, UErrorCode * err)
{
if (U_FAILURE (*err))
return NULL;
if(converter->sharedData->impl->getName){
const char* temp= converter->sharedData->impl->getName(converter);
if(temp)
return temp;
}
return converter->sharedData->staticData->name;
}
U_CAPI int32_t U_EXPORT2
ucnv_getCCSID(const UConverter * converter,
UErrorCode * err)
{
int32_t ccsid;
if (U_FAILURE (*err))
return -1;
ccsid = converter->sharedData->staticData->codepage;
if (ccsid == 0) {
/* Rare case. This is for cases like gb18030,
which doesn't have an IBM canonical name, but does have an IBM alias. */
const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
if (U_SUCCESS(*err) && standardName) {
const char *ccsidStr = uprv_strchr(standardName, '-');
if (ccsidStr) {
ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
}
}
}
return ccsid;
}
U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform (const UConverter * converter,
UErrorCode * err)
{
if (U_FAILURE (*err))
return UCNV_UNKNOWN;
return (UConverterPlatform)converter->sharedData->staticData->platform;
}
U_CAPI void U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter,
UConverterToUCallback *action,
const void **context)
{
*action = converter->fromCharErrorBehaviour;
*context = converter->toUContext;
}
U_CAPI void U_EXPORT2
ucnv_getFromUCallBack (const UConverter * converter,
UConverterFromUCallback *action,
const void **context)
{
*action = converter->fromUCharErrorBehaviour;
*context = converter->fromUContext;
}
U_CAPI void U_EXPORT2
ucnv_setToUCallBack (UConverter * converter,
UConverterToUCallback newAction,
const void* newContext,
UConverterToUCallback *oldAction,
const void** oldContext,
UErrorCode * err)
{
if (U_FAILURE (*err))
return;
if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
converter->fromCharErrorBehaviour = newAction;
if (oldContext) *oldContext = converter->toUContext;
converter->toUContext = newContext;
}
U_CAPI void U_EXPORT2
ucnv_setFromUCallBack (UConverter * converter,
UConverterFromUCallback newAction,
const void* newContext,
UConverterFromUCallback *oldAction,
const void** oldContext,
UErrorCode * err)
{
if (U_FAILURE (*err))
return;
if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
converter->fromUCharErrorBehaviour = newAction;
if (oldContext) *oldContext = converter->fromUContext;
converter->fromUContext = newContext;
}
static void
_updateOffsets(int32_t *offsets, int32_t length,
int32_t sourceIndex, int32_t errorInputLength) {
int32_t *limit;
int32_t delta, offset;
if(sourceIndex>=0) {
/*
* adjust each offset by adding the previous sourceIndex
* minus the length of the input sequence that caused an
* error, if any
*/
delta=sourceIndex-errorInputLength;
} else {
/*
* set each offset to -1 because this conversion function
* does not handle offsets
*/
delta=-1;
}
limit=offsets+length;
if(delta==0) {
/* most common case, nothing to do */
} else if(delta>0) {
/* add the delta to each offset (but not if the offset is <0) */
while(offsets<limit) {
offset=*offsets;
if(offset>=0) {
*offsets=offset+delta;
}
++offsets;
}
} else /* delta<0 */ {
/*
* set each offset to -1 because this conversion function
* does not handle offsets
* or the error input sequence started in a previous buffer
*/
while(offsets<limit) {
*offsets++=-1;
}
}
}
/* ucnv_fromUnicode --------------------------------------------------------- */
/*
* Implementation note for m:n conversions
*
* While collecting source units to find the longest match for m:n conversion,
* some source units may need to be stored for a partial match.
* When a second buffer does not yield a match on all of the previously stored
* source units, then they must be "replayed", i.e., fed back into the converter.
*
* The code relies on the fact that replaying will not nest -
* converting a replay buffer will not result in a replay.
* This is because a replay is necessary only after the _continuation_ of a
* partial match failed, but a replay buffer is converted as a whole.
* It may result in some of its units being stored again for a partial match,
* but there will not be a continuation _during_ the replay which could fail.
*
* It is conceivable that a callback function could call the converter
* recursively in a way that causes another replay to be stored, but that
* would be an error in the callback function.
* Such violations will cause assertion failures in a debug build,
* and wrong output, but they will not cause a crash.
*/
static void
_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
UConverterFromUnicode fromUnicode;
UConverter *cnv;
const UChar *s;
char *t;
int32_t *offsets;
int32_t sourceIndex;
int32_t errorInputLength;
UBool converterSawEndOfInput, calledCallback;
/* variables for m:n conversion */
UChar replay[UCNV_EXT_MAX_UCHARS];
const UChar *realSource, *realSourceLimit;
int32_t realSourceIndex;
UBool realFlush;
cnv=pArgs->converter;
s=pArgs->source;
t=pArgs->target;
offsets=pArgs->offsets;
/* get the converter implementation function */
sourceIndex=0;
if(offsets==NULL) {
fromUnicode=cnv->sharedData->impl->fromUnicode;
} else {
fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
if(fromUnicode==NULL) {
/* there is no WithOffsets implementation */
fromUnicode=cnv->sharedData->impl->fromUnicode;
/* we will write -1 for each offset */
sourceIndex=-1;
}
}
if(cnv->preFromULength>=0) {
/* normal mode */
realSource=NULL;
/* avoid compiler warnings - not otherwise necessary, and the values do not matter */
realSourceLimit=NULL;
realFlush=FALSE;
realSourceIndex=0;
} else {
/*
* Previous m:n conversion stored source units from a partial match
* and failed to consume all of them.
* We need to "replay" them from a temporary buffer and convert them first.
*/
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
pArgs->source=replay;
pArgs->sourceLimit=replay-cnv->preFromULength;
pArgs->flush=FALSE;
sourceIndex=-1;
cnv->preFromULength=0;
}
/*
* loop for conversion and error handling
*
* loop {
* convert
* loop {
* update offsets
* handle end of input
* handle errors/call callback
* }
* }
*/
for(;;) {
if(U_SUCCESS(*err)) {
/* convert */
fromUnicode(pArgs, err);
/*
* set a flag for whether the converter
* successfully processed the end of the input
*
* need not check cnv->preFromULength==0 because a replay (<0) will cause
* s<sourceLimit before converterSawEndOfInput is checked
*/
converterSawEndOfInput=
(UBool)(U_SUCCESS(*err) &&
pArgs->flush && pArgs->source==pArgs->sourceLimit &&
cnv->fromUChar32==0);
} else {
/* handle error from ucnv_convertEx() */
converterSawEndOfInput=FALSE;
}
/* no callback called yet for this iteration */
calledCallback=FALSE;
/* no sourceIndex adjustment for conversion, only for callback output */
errorInputLength=0;
/*
* loop for offsets and error handling
*
* iterates at most 3 times:
* 1. to clean up after the conversion function
* 2. after the callback
* 3. after the callback again if there was truncated input
*/
for(;;) {
/* update offsets if we write any */
if(offsets!=NULL) {
int32_t length=(int32_t)(pArgs->target-t);
if(length>0) {
_updateOffsets(offsets, length, sourceIndex, errorInputLength);
/*
* if a converter handles offsets and updates the offsets
* pointer at the end, then pArgs->offset should not change
* here;
* however, some converters do not handle offsets at all
* (sourceIndex<0) or may not update the offsets pointer
*/
pArgs->offsets=offsets+=length;
}
if(sourceIndex>=0) {
sourceIndex+=(int32_t)(pArgs->source-s);
}
}
if(cnv->preFromULength<0) {
/*
* switch the source to new replay units (cannot occur while replaying)
* after offset handling and before end-of-input and callback handling
*/
if(realSource==NULL) {
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
pArgs->source=replay;
pArgs->sourceLimit=replay-cnv->preFromULength;
pArgs->flush=FALSE;
if((sourceIndex+=cnv->preFromULength)<0) {
sourceIndex=-1;
}
cnv->preFromULength=0;
} else {
/* see implementation note before _fromUnicodeWithCallback() */
U_ASSERT(realSource==NULL);
*err=U_INTERNAL_PROGRAM_ERROR;
}
}
/* update pointers */
s=pArgs->source;
t=pArgs->target;
if(U_SUCCESS(*err)) {
if(s<pArgs->sourceLimit) {
/*
* continue with the conversion loop while there is still input left
* (continue converting by breaking out of only the inner loop)
*/
break;
} else if(realSource!=NULL) {
/* switch back from replaying to the real source and continue */
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
sourceIndex=realSourceIndex;
realSource=NULL;
break;
} else if(pArgs->flush && cnv->fromUChar32!=0) {
/*
* the entire input stream is consumed
* and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*err=U_TRUNCATED_CHAR_FOUND;
calledCallback=FALSE; /* new error condition */
} else {
/* input consumed */
if(pArgs->flush) {
/*
* return to the conversion loop once more if the flush
* flag is set and the conversion function has not
* successfully processed the end of the input yet
*
* (continue converting by breaking out of only the inner loop)
*/
if(!converterSawEndOfInput) {
break;
}
/* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
}
/* done successfully */
return;
}
}
/* U_FAILURE(*err) */
{
UErrorCode e;
if( calledCallback ||
(e=*err)==U_BUFFER_OVERFLOW_ERROR ||
(e!=U_INVALID_CHAR_FOUND &&
e!=U_ILLEGAL_CHAR_FOUND &&
e!=U_TRUNCATED_CHAR_FOUND)
) {
/*
* the callback did not or cannot resolve the error:
* set output pointers and return
*
* the check for buffer overflow is redundant but it is
* a high-runner case and hopefully documents the intent
* well
*
* if we were replaying, then the replay buffer must be
* copied back into the UConverter
* and the real arguments must be restored
*/
if(realSource!=NULL) {
int32_t length;
U_ASSERT(cnv->preFromULength==0);
length=(int32_t)(pArgs->sourceLimit-pArgs->source);
if(length>0) {
uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
cnv->preFromULength=(int8_t)-length;
}
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
}
return;
}
}
/* callback handling */
{
UChar32 codePoint;
/* get and write the code point */
codePoint=cnv->fromUChar32;
errorInputLength=0;
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
cnv->invalidUCharLength=(int8_t)errorInputLength;
/* set the converter state to deal with the next character */
cnv->fromUChar32=0;
/* call the callback function */
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
cnv->invalidUCharBuffer, errorInputLength, codePoint,
*err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
err);
}
/*
* loop back to the offset handling
*
* this flag will indicate after offset handling
* that a callback was called;
* if the callback did not resolve the error, then we return
*/
calledCallback=TRUE;
}
}
}
/*
* Output the fromUnicode overflow buffer.
* Call this function if(cnv->charErrorBufferLength>0).
* @return TRUE if overflow
*/
static UBool
ucnv_outputOverflowFromUnicode(UConverter *cnv,
char **target, const char *targetLimit,
int32_t **pOffsets,
UErrorCode *err) {
int32_t *offsets;
char *overflow, *t;
int32_t i, length;
t=*target;
if(pOffsets!=NULL) {
offsets=*pOffsets;
} else {
offsets=NULL;
}
overflow=(char *)cnv->charErrorBuffer;
length=cnv->charErrorBufferLength;
i=0;
while(i<length) {
if(t==targetLimit) {
/* the overflow buffer contains too much, keep the rest */
int32_t j=0;
do {
overflow[j++]=overflow[i++];
} while(i<length);
cnv->charErrorBufferLength=(int8_t)j;
*target=t;
if(offsets!=NULL) {
*pOffsets=offsets;
}
*err=U_BUFFER_OVERFLOW_ERROR;
return TRUE;
}
/* copy the overflow contents to the target */
*t++=overflow[i++];
if(offsets!=NULL) {
*offsets++=-1; /* no source index available for old output */
}
}
/* the overflow buffer is completely copied to the target */
cnv->charErrorBufferLength=0;
*target=t;
if(offsets!=NULL) {
*pOffsets=offsets;
}
return FALSE;
}
U_CAPI void U_EXPORT2
ucnv_fromUnicode(UConverter *cnv,
char **target, const char *targetLimit,
const UChar **source, const UChar *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode *err) {
UConverterFromUnicodeArgs args;
const UChar *s;
char *t;
/* check parameters */
if(err==NULL || U_FAILURE(*err)) {
return;
}
if(cnv==NULL || target==NULL || source==NULL) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
s=*source;
t=*target;
if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
/*
Prevent code from going into an infinite loop in case we do hit this
limit. The limit pointer is expected to be on a UChar * boundary.
This also prevents the next argument check from failing.
*/
sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
}
/*
* All these conditions should never happen.
*
* 1) Make sure that the limits are >= to the address source or target
*
* 2) Make sure that the buffer sizes do not exceed the number range for
* int32_t because some functions use the size (in units or bytes)
* rather than comparing pointers, and because offsets are int32_t values.
*
* size_t is guaranteed to be unsigned and large enough for the job.
*
* Return with an error instead of adjusting the limits because we would
* not be able to maintain the semantics that either the source must be
* consumed or the target filled (unless an error occurs).
* An adjustment would be targetLimit=t+0x7fffffff; for example.
*
* 3) Make sure that the user didn't incorrectly cast a UChar * pointer
* to a char * pointer and provide an incomplete UChar code unit.
*/
if (sourceLimit<s || targetLimit<t ||
((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
(((const char *)sourceLimit-(const char *)s) & 1) != 0)
{
*err=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* output the target overflow buffer */
if( cnv->charErrorBufferLength>0 &&
ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
) {
/* U_BUFFER_OVERFLOW_ERROR */
return;
}
/* *target may have moved, therefore stop using t */
if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
/* the overflow buffer is emptied and there is no new input: we are done */
return;
}
/*
* Do not simply return with a buffer overflow error if
* !flush && t==targetLimit
* because it is possible that the source will not generate any output.
* For example, the skip callback may be called;
* it does not output anything.
*/
/* prepare the converter arguments */
args.converter=cnv;
args.flush=flush;
args.offsets=offsets;
args.source=s;
args.sourceLimit=sourceLimit;
args.target=*target;
args.targetLimit=targetLimit;
args.size=sizeof(args);
_fromUnicodeWithCallback(&args, err);
*source=args.source;
*target=args.target;
}
/* ucnv_toUnicode() --------------------------------------------------------- */
static void
_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
UConverterToUnicode toUnicode;
UConverter *cnv;
const char *s;
UChar *t;
int32_t *offsets;
int32_t sourceIndex;
int32_t errorInputLength;
UBool converterSawEndOfInput, calledCallback;
/* variables for m:n conversion */
char replay[UCNV_EXT_MAX_BYTES];
const char *realSource, *realSourceLimit;
int32_t realSourceIndex;
UBool realFlush;
cnv=pArgs->converter;
s=pArgs->source;
t=pArgs->target;
offsets=pArgs->offsets;
/* get the converter implementation function */
sourceIndex=0;
if(offsets==NULL) {
toUnicode=cnv->sharedData->impl->toUnicode;
} else {
toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
if(toUnicode==NULL) {
/* there is no WithOffsets implementation */
toUnicode=cnv->sharedData->impl->toUnicode;
/* we will write -1 for each offset */
sourceIndex=-1;
}
}
if(cnv->preToULength>=0) {
/* normal mode */
realSource=NULL;
/* avoid compiler warnings - not otherwise necessary, and the values do not matter */
realSourceLimit=NULL;
realFlush=FALSE;
realSourceIndex=0;
} else {
/*
* Previous m:n conversion stored source units from a partial match
* and failed to consume all of them.
* We need to "replay" them from a temporary buffer and convert them first.
*/
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
pArgs->source=replay;
pArgs->sourceLimit=replay-cnv->preToULength;
pArgs->flush=FALSE;
sourceIndex=-1;
cnv->preToULength=0;
}
/*
* loop for conversion and error handling
*
* loop {
* convert
* loop {
* update offsets
* handle end of input
* handle errors/call callback
* }
* }
*/
for(;;) {
if(U_SUCCESS(*err)) {
/* convert */
toUnicode(pArgs, err);
/*
* set a flag for whether the converter
* successfully processed the end of the input
*
* need not check cnv->preToULength==0 because a replay (<0) will cause
* s<sourceLimit before converterSawEndOfInput is checked
*/
converterSawEndOfInput=
(UBool)(U_SUCCESS(*err) &&
pArgs->flush && pArgs->source==pArgs->sourceLimit &&
cnv->toULength==0);
} else {
/* handle error from getNextUChar() or ucnv_convertEx() */
converterSawEndOfInput=FALSE;
}
/* no callback called yet for this iteration */
calledCallback=FALSE;
/* no sourceIndex adjustment for conversion, only for callback output */
errorInputLength=0;
/*
* loop for offsets and error handling
*
* iterates at most 3 times:
* 1. to clean up after the conversion function
* 2. after the callback
* 3. after the callback again if there was truncated input
*/
for(;;) {
/* update offsets if we write any */
if(offsets!=NULL) {
int32_t length=(int32_t)(pArgs->target-t);
if(length>0) {
_updateOffsets(offsets, length, sourceIndex, errorInputLength);
/*
* if a converter handles offsets and updates the offsets
* pointer at the end, then pArgs->offset should not change
* here;
* however, some converters do not handle offsets at all
* (sourceIndex<0) or may not update the offsets pointer
*/
pArgs->offsets=offsets+=length;
}
if(sourceIndex>=0) {
sourceIndex+=(int32_t)(pArgs->source-s);
}
}
if(cnv->preToULength<0) {
/*
* switch the source to new replay units (cannot occur while replaying)
* after offset handling and before end-of-input and callback handling
*/
if(realSource==NULL) {
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
pArgs->source=replay;
pArgs->sourceLimit=replay-cnv->preToULength;
pArgs->flush=FALSE;
if((sourceIndex+=cnv->preToULength)<0) {
sourceIndex=-1;
}
cnv->preToULength=0;
} else {
/* see implementation note before _fromUnicodeWithCallback() */
U_ASSERT(realSource==NULL);
*err=U_INTERNAL_PROGRAM_ERROR;
}
}
/* update pointers */
s=pArgs->source;
t=pArgs->target;
if(U_SUCCESS(*err)) {
if(s<pArgs->sourceLimit) {
/*
* continue with the conversion loop while there is still input left
* (continue converting by breaking out of only the inner loop)
*/
break;
} else if(realSource!=NULL) {
/* switch back from replaying to the real source and continue */
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
sourceIndex=realSourceIndex;
realSource=NULL;
break;
} else if(pArgs->flush && cnv->toULength>0) {
/*
* the entire input stream is consumed
* and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*err=U_TRUNCATED_CHAR_FOUND;
calledCallback=FALSE; /* new error condition */
} else {
/* input consumed */
if(pArgs->flush) {
/*
* return to the conversion loop once more if the flush
* flag is set and the conversion function has not
* successfully processed the end of the input yet
*
* (continue converting by breaking out of only the inner loop)
*/
if(!converterSawEndOfInput) {
break;
}
/* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
}
/* done successfully */
return;
}
}
/* U_FAILURE(*err) */
{
UErrorCode e;
if( calledCallback ||
(e=*err)==U_BUFFER_OVERFLOW_ERROR ||
(e!=U_INVALID_CHAR_FOUND &&
e!=U_ILLEGAL_CHAR_FOUND &&
e!=U_TRUNCATED_CHAR_FOUND &&
e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
) {
/*
* the callback did not or cannot resolve the error:
* set output pointers and return
*
* the check for buffer overflow is redundant but it is
* a high-runner case and hopefully documents the intent
* well
*
* if we were replaying, then the replay buffer must be
* copied back into the UConverter
* and the real arguments must be restored
*/
if(realSource!=NULL) {
int32_t length;
U_ASSERT(cnv->preToULength==0);
length=(int32_t)(pArgs->sourceLimit-pArgs->source);
if(length>0) {
uprv_memcpy(cnv->preToU, pArgs->source, length);
cnv->preToULength=(int8_t)-length;
}
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
}
return;
}
}
/* copy toUBytes[] to invalidCharBuffer[] */
errorInputLength=cnv->invalidCharLength=cnv->toULength;
if(errorInputLength>0) {
uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
}
/* set the converter state to deal with the next character */
cnv->toULength=0;
/* call the callback function */
if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
cnv->toUCallbackReason = UCNV_UNASSIGNED;
}
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
cnv->invalidCharBuffer, errorInputLength,
cnv->toUCallbackReason,
err);
cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
/*
* loop back to the offset handling
*
* this flag will indicate after offset handling
* that a callback was called;
* if the callback did not resolve the error, then we return
*/
calledCallback=TRUE;
}
}
}
/*
* Output the toUnicode overflow buffer.
* Call this function if(cnv->UCharErrorBufferLength>0).
* @return TRUE if overflow
*/
static UBool
ucnv_outputOverflowToUnicode(UConverter *cnv,
UChar **target, const UChar *targetLimit,
int32_t **pOffsets,
UErrorCode *err) {
int32_t *offsets;
UChar *overflow, *t;
int32_t i, length;
t=*target;
if(pOffsets!=NULL) {
offsets=*pOffsets;
} else {
offsets=NULL;
}
overflow=cnv->UCharErrorBuffer;
length=cnv->UCharErrorBufferLength;
i=0;
while(i<length) {
if(t==targetLimit) {
/* the overflow buffer contains too much, keep the rest */
int32_t j=0;
do {
overflow[j++]=overflow[i++];
} while(i<length);
cnv->UCharErrorBufferLength=(int8_t)j;
*target=t;
if(offsets!=NULL) {
*pOffsets=offsets;
}
*err=U_BUFFER_OVERFLOW_ERROR;
return TRUE;
}
/* copy the overflow contents to the target */
*t++=overflow[i++];
if(offsets!=NULL) {
*offsets++=-1; /* no source index available for old output */
}
}
/* the overflow buffer is completely copied to the target */
cnv->UCharErrorBufferLength=0;
*target=t;
if(offsets!=NULL) {
*pOffsets=offsets;
}
return FALSE;
}
U_CAPI void U_EXPORT2
ucnv_toUnicode(UConverter *cnv,
UChar **target, const UChar *targetLimit,
const char **source, const char *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode *err) {
UConverterToUnicodeArgs args;
const char *s;
UChar *t;
/* check parameters */
if(err==NULL || U_FAILURE(*err)) {
return;
}
if(cnv==NULL || target==NULL || source==NULL) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
s=*source;
t=*target;
if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
/*
Prevent code from going into an infinite loop in case we do hit this
limit. The limit pointer is expected to be on a UChar * boundary.
This also prevents the next argument check from failing.
*/
targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
}
/*
* All these conditions should never happen.
*
* 1) Make sure that the limits are >= to the address source or target
*
* 2) Make sure that the buffer sizes do not exceed the number range for
* int32_t because some functions use the size (in units or bytes)
* rather than comparing pointers, and because offsets are int32_t values.
*
* size_t is guaranteed to be unsigned and large enough for the job.
*
* Return with an error instead of adjusting the limits because we would
* not be able to maintain the semantics that either the source must be
* consumed or the target filled (unless an error occurs).
* An adjustment would be sourceLimit=t+0x7fffffff; for example.
*
* 3) Make sure that the user didn't incorrectly cast a UChar * pointer
* to a char * pointer and provide an incomplete UChar code unit.
*/
if (sourceLimit<s || targetLimit<t ||
((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
(((const char *)targetLimit-(const char *)t) & 1) != 0
) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* output the target overflow buffer */
if( cnv->UCharErrorBufferLength>0 &&
ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
) {
/* U_BUFFER_OVERFLOW_ERROR */
return;
}
/* *target may have moved, therefore stop using t */
if(!flush && s==sourceLimit && cnv->preToULength>=0) {
/* the overflow buffer is emptied and there is no new input: we are done */
return;
}
/*
* Do not simply return with a buffer overflow error if
* !flush && t==targetLimit
* because it is possible that the source will not generate any output.
* For example, the skip callback may be called;
* it does not output anything.
*/
/* prepare the converter arguments */
args.converter=cnv;
args.flush=flush;
args.offsets=offsets;
args.source=s;
args.sourceLimit=sourceLimit;
args.target=*target;
args.targetLimit=targetLimit;
args.size=sizeof(args);
_toUnicodeWithCallback(&args, err);
*source=args.source;
*target=args.target;
}
/* ucnv_to/fromUChars() ----------------------------------------------------- */
U_CAPI int32_t U_EXPORT2
ucnv_fromUChars(UConverter *cnv,
char *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
const UChar *srcLimit;
char *originalDest, *destLimit;
int32_t destLength;
/* check arguments */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( cnv==NULL ||
destCapacity<0 || (destCapacity>0 && dest==NULL) ||
srcLength<-1 || (srcLength!=0 && src==NULL)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* initialize */
ucnv_resetFromUnicode(cnv);
originalDest=dest;
if(srcLength==-1) {
srcLength=u_strlen(src);
}
if(srcLength>0) {
srcLimit=src+srcLength;
destLimit=dest+destCapacity;
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
destLimit=(char *)U_MAX_PTR(dest);
}
/* perform the conversion */
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength=(int32_t)(dest-originalDest);
/* if an overflow occurs, then get the preflighting length */
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
char buffer[1024];
destLimit=buffer+sizeof(buffer);
do {
dest=buffer;
*pErrorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength+=(int32_t)(dest-buffer);
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
}
} else {
destLength=0;
}
return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
ucnv_toUChars(UConverter *cnv,
UChar *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
const char *srcLimit;
UChar *originalDest, *destLimit;
int32_t destLength;
/* check arguments */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( cnv==NULL ||
destCapacity<0 || (destCapacity>0 && dest==NULL) ||
srcLength<-1 || (srcLength!=0 && src==NULL))
{
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* initialize */
ucnv_resetToUnicode(cnv);
originalDest=dest;
if(srcLength==-1) {
srcLength=(int32_t)uprv_strlen(src);
}
if(srcLength>0) {
srcLimit=src+srcLength;
destLimit=dest+destCapacity;
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
destLimit=(UChar *)U_MAX_PTR(dest);
}
/* perform the conversion */
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength=(int32_t)(dest-originalDest);
/* if an overflow occurs, then get the preflighting length */
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
{
UChar buffer[1024];
destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
do {
dest=buffer;
*pErrorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength+=(int32_t)(dest-buffer);
}
while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
}
} else {
destLength=0;
}
return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
}
/* ucnv_getNextUChar() ------------------------------------------------------ */
U_CAPI UChar32 U_EXPORT2
ucnv_getNextUChar(UConverter *cnv,
const char **source, const char *sourceLimit,
UErrorCode *err) {
UConverterToUnicodeArgs args;
UChar buffer[U16_MAX_LENGTH];
const char *s;
UChar32 c;
int32_t i, length;
/* check parameters */
if(err==NULL || U_FAILURE(*err)) {
return 0xffff;
}
if(cnv==NULL || source==NULL) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return 0xffff;
}
s=*source;
if(sourceLimit<s) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return 0xffff;
}
/*
* Make sure that the buffer sizes do not exceed the number range for
* int32_t because some functions use the size (in units or bytes)
* rather than comparing pointers, and because offsets are int32_t values.
*
* size_t is guaranteed to be unsigned and large enough for the job.
*
* Return with an error instead of adjusting the limits because we would
* not be able to maintain the semantics that either the source must be
* consumed or the target filled (unless an error occurs).
* An adjustment would be sourceLimit=t+0x7fffffff; for example.
*/
if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
*err=U_ILLEGAL_ARGUMENT_ERROR;
return 0xffff;
}
c=U_SENTINEL;
/* flush the target overflow buffer */
if(cnv->UCharErrorBufferLength>0) {
UChar *overflow;
overflow=cnv->UCharErrorBuffer;
i=0;
length=cnv->UCharErrorBufferLength;
U16_NEXT(overflow, i, length, c);
/* move the remaining overflow contents up to the beginning */
if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
}
if(!U16_IS_LEAD(c) || i<length) {
return c;
}
/*
* Continue if the overflow buffer contained only a lead surrogate,
* in case the converter outputs single surrogates from complete
* input sequences.
*/
}
/*
* flush==TRUE is implied for ucnv_getNextUChar()
*
* do not simply return even if s==sourceLimit because the converter may
* not have seen flush==TRUE before
*/
/* prepare the converter arguments */
args.converter=cnv;
args.flush=TRUE;
args.offsets=NULL;
args.source=s;
args.sourceLimit=sourceLimit;
args.target=buffer;
args.targetLimit=buffer+1;
args.size=sizeof(args);
if(c<0) {
/*
* call the native getNextUChar() implementation if we are
* at a character boundary (toULength==0)
*
* unlike with _toUnicode(), getNextUChar() implementations must set
* U_TRUNCATED_CHAR_FOUND for truncated input,
* in addition to setting toULength/toUBytes[]
*/
if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
c=cnv->sharedData->impl->getNextUChar(&args, err);
*source=s=args.source;
if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
/* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
return 0xffff; /* no output */
} else if(U_SUCCESS(*err) && c>=0) {
return c;
/*
* else fall through to use _toUnicode() because
* UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
* U_FAILURE: call _toUnicode() for callback handling (do not output c)
*/
}
}
/* convert to one UChar in buffer[0], or handle getNextUChar() errors */
_toUnicodeWithCallback(&args, err);
if(*err==U_BUFFER_OVERFLOW_ERROR) {
*err=U_ZERO_ERROR;
}
i=0;
length=(int32_t)(args.target-buffer);
} else {
/* write the lead surrogate from the overflow buffer */
buffer[0]=(UChar)c;
args.target=buffer+1;
i=0;
length=1;
}
/* buffer contents starts at i and ends before length */
if(U_FAILURE(*err)) {
c=0xffff; /* no output */
} else if(length==0) {
/* no input or only state changes */
*err=U_INDEX_OUTOFBOUNDS_ERROR;
/* no need to reset explicitly because _toUnicodeWithCallback() did it */
c=0xffff; /* no output */
} else {
c=buffer[0];
i=1;
if(!U16_IS_LEAD(c)) {
/* consume c=buffer[0], done */
} else {
/* got a lead surrogate, see if a trail surrogate follows */
UChar c2;
if(cnv->UCharErrorBufferLength>0) {
/* got overflow output from the conversion */
if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
/* got a trail surrogate, too */
c=U16_GET_SUPPLEMENTARY(c, c2);
/* move the remaining overflow contents up to the beginning */
if((--cnv->UCharErrorBufferLength)>0) {
uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
}
} else {
/* c is an unpaired lead surrogate, just return it */
}
} else if(args.source<sourceLimit) {
/* convert once more, to buffer[1] */
args.targetLimit=buffer+2;
_toUnicodeWithCallback(&args, err);
if(*err==U_BUFFER_OVERFLOW_ERROR) {
*err=U_ZERO_ERROR;
}
length=(int32_t)(args.target-buffer);
if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
/* got a trail surrogate, too */
c=U16_GET_SUPPLEMENTARY(c, c2);
i=2;
}
}
}
}
/*
* move leftover output from buffer[i..length[
* into the beginning of the overflow buffer
*/
if(i<length) {
/* move further overflow back */
int32_t delta=length-i;
if((length=cnv->UCharErrorBufferLength)>0) {
uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
length*U_SIZEOF_UCHAR);
}
cnv->UCharErrorBufferLength=(int8_t)(length+delta);
cnv->UCharErrorBuffer[0]=buffer[i++];
if(delta>1) {
cnv->UCharErrorBuffer[1]=buffer[i];
}
}
*source=args.source;
return c;
}
/* ucnv_convert() and siblings ---------------------------------------------- */
U_CAPI void U_EXPORT2
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
char **target, const char *targetLimit,
const char **source, const char *sourceLimit,
UChar *pivotStart, UChar **pivotSource,
UChar **pivotTarget, const UChar *pivotLimit,
UBool reset, UBool flush,
UErrorCode *pErrorCode) {
UChar pivotBuffer[CHUNK_SIZE];
const UChar *myPivotSource;
UChar *myPivotTarget;
const char *s;
char *t;
UConverterToUnicodeArgs toUArgs;
UConverterFromUnicodeArgs fromUArgs;
UConverterConvert convert;
/* error checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
if( targetCnv==NULL || sourceCnv==NULL ||
source==NULL || *source==NULL ||
target==NULL || *target==NULL || targetLimit==NULL
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
s=*source;
t=*target;
if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/*
* Make sure that the buffer sizes do not exceed the number range for
* int32_t. See ucnv_toUnicode() for a more detailed comment.
*/
if(
(sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pivotStart==NULL) {
if(!flush) {
/* streaming conversion requires an explicit pivot buffer */
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* use the stack pivot buffer */
myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
pivotSource=(UChar **)&myPivotSource;
pivotTarget=&myPivotTarget;
pivotLimit=pivotBuffer+CHUNK_SIZE;
} else if( pivotStart>=pivotLimit ||
pivotSource==NULL || *pivotSource==NULL ||
pivotTarget==NULL || *pivotTarget==NULL ||
pivotLimit==NULL
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(sourceLimit==NULL) {
/* get limit of single-byte-NUL-terminated source string */
sourceLimit=uprv_strchr(*source, 0);
}
if(reset) {
ucnv_resetToUnicode(sourceCnv);
ucnv_resetFromUnicode(targetCnv);
*pivotSource=*pivotTarget=pivotStart;
} else if(targetCnv->charErrorBufferLength>0) {
/* output the targetCnv overflow buffer */
if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
/* U_BUFFER_OVERFLOW_ERROR */
return;
}
/* *target has moved, therefore stop using t */
if( !flush &&
targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
) {
/* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
return;
}
}
/* Is direct-UTF-8 conversion available? */
if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
targetCnv->sharedData->impl->fromUTF8!=NULL
) {
convert=targetCnv->sharedData->impl->fromUTF8;
} else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
sourceCnv->sharedData->impl->toUTF8!=NULL
) {
convert=sourceCnv->sharedData->impl->toUTF8;
} else {
convert=NULL;
}
/*
* If direct-UTF-8 conversion is available, then we use a smaller
* pivot buffer for error handling and partial matches
* so that we quickly return to direct conversion.
*
* 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
*
* We could reduce the pivot buffer size further, at the cost of
* buffer overflows from callbacks.
* The pivot buffer should not be smaller than the maximum number of
* fromUnicode extension table input UChars
* (for m:n conversion, see
* targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
* or 2 for surrogate pairs.
*
* Too small a buffer can cause thrashing between pivoting and direct
* conversion, with function call overhead outweighing the benefits
* of direct conversion.
*/
if(convert!=NULL && (pivotLimit-pivotStart)>32) {
pivotLimit=pivotStart+32;
}
/* prepare the converter arguments */
fromUArgs.converter=targetCnv;
fromUArgs.flush=FALSE;
fromUArgs.offsets=NULL;
fromUArgs.target=*target;
fromUArgs.targetLimit=targetLimit;
fromUArgs.size=sizeof(fromUArgs);
toUArgs.converter=sourceCnv;
toUArgs.flush=flush;
toUArgs.offsets=NULL;
toUArgs.source=s;
toUArgs.sourceLimit=sourceLimit;
toUArgs.targetLimit=pivotLimit;
toUArgs.size=sizeof(toUArgs);
/*
* TODO: Consider separating this function into two functions,
* extracting exactly the conversion loop,
* for readability and to reduce the set of visible variables.
*
* Otherwise stop using s and t from here on.
*/
s=t=NULL;
/*
* conversion loop
*
* The sequence of steps in the loop may appear backward,
* but the principle is simple:
* In the chain of
* source - sourceCnv overflow - pivot - targetCnv overflow - target
* empty out later buffers before refilling them from earlier ones.
*
* The targetCnv overflow buffer is flushed out only once before the loop.
*/
for(;;) {
/*
* if(pivot not empty or error or replay or flush fromUnicode) {
* fromUnicode(pivot -> target);
* }
*
* For pivoting conversion; and for direct conversion for
* error callback handling and flushing the replay buffer.
*/
if( *pivotSource<*pivotTarget ||
U_FAILURE(*pErrorCode) ||
targetCnv->preFromULength<0 ||
fromUArgs.flush
) {
fromUArgs.source=*pivotSource;
fromUArgs.sourceLimit=*pivotTarget;
_fromUnicodeWithCallback(&fromUArgs, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
/* target overflow, or conversion error */
*pivotSource=(UChar *)fromUArgs.source;
break;
}
/*
* _fromUnicodeWithCallback() must have consumed the pivot contents
* (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
*/
}
/* The pivot buffer is empty; reset it so we start at pivotStart. */
*pivotSource=*pivotTarget=pivotStart;
/*
* if(sourceCnv overflow buffer not empty) {
* move(sourceCnv overflow buffer -> pivot);
* continue;
* }
*/
/* output the sourceCnv overflow buffer */
if(sourceCnv->UCharErrorBufferLength>0) {
if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
/* U_BUFFER_OVERFLOW_ERROR */
*pErrorCode=U_ZERO_ERROR;
}
continue;
}
/*
* check for end of input and break if done
*
* Checking both flush and fromUArgs.flush ensures that the converters
* have been called with the flush flag set if the ucnv_convertEx()
* caller set it.
*/
if( toUArgs.source==sourceLimit &&
sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
(!flush || fromUArgs.flush)
) {
/* done successfully */
break;
}
/*
* use direct conversion if available
* but not if continuing a partial match
* or flushing the toUnicode replay buffer
*/
if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
if(*pErrorCode==U_USING_DEFAULT_WARNING) {
/* remove a warning that may be set by this function */
*pErrorCode=U_ZERO_ERROR;
}
convert(&fromUArgs, &toUArgs, pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
break;
} else if(U_FAILURE(*pErrorCode)) {
if(sourceCnv->toULength>0) {
/*
* Fall through to calling _toUnicodeWithCallback()
* for callback handling.
*
* The pivot buffer will be reset with
* *pivotSource=*pivotTarget=pivotStart;
* which indicates a toUnicode error to the caller
* (*pivotSource==pivotStart shows no pivot UChars consumed).
*/
} else {
/*
* Indicate a fromUnicode error to the caller
* (*pivotSource>pivotStart shows some pivot UChars consumed).
*/
*pivotSource=*pivotTarget=pivotStart+1;
/*
* Loop around to calling _fromUnicodeWithCallbacks()
* for callback handling.
*/
continue;
}
} else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
/*
* No error, but the implementation requested to temporarily
* fall back to pivoting.
*/
*pErrorCode=U_ZERO_ERROR;
/*
* The following else branches are almost identical to the end-of-input
* handling in _toUnicodeWithCallback().
* Avoid calling it just for the end of input.
*/
} else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
/*
* the entire input stream is consumed
* and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
} else {
/* input consumed */
if(flush) {
/* reset the converters without calling the callback functions */
_reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
_reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
}
/* done successfully */
break;
}
}
/*
* toUnicode(source -> pivot);
*
* For pivoting conversion; and for direct conversion for
* error callback handling, continuing partial matches
* and flushing the replay buffer.
*
* The pivot buffer is empty and reset.
*/
toUArgs.target=pivotStart; /* ==*pivotTarget */
/* toUArgs.targetLimit=pivotLimit; already set before the loop */
_toUnicodeWithCallback(&toUArgs, pErrorCode);
*pivotTarget=toUArgs.target;
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
/* pivot overflow: continue with the conversion loop */
*pErrorCode=U_ZERO_ERROR;
} else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
/* conversion error, or there was nothing left to convert */
break;
}
/*
* else:
* _toUnicodeWithCallback() wrote into the pivot buffer,
* continue with fromUnicode conversion.
*
* Set the fromUnicode flush flag if we flush and if toUnicode has
* processed the end of the input.
*/
if( flush && toUArgs.source==sourceLimit &&
sourceCnv->preToULength>=0 &&
sourceCnv->UCharErrorBufferLength==0
) {
fromUArgs.flush=TRUE;
}
}
/*
* The conversion loop is exited when one of the following is true:
* - the entire source text has been converted successfully to the target buffer
* - a target buffer overflow occurred
* - a conversion error occurred
*/
*source=toUArgs.source;
*target=fromUArgs.target;
/* terminate the target buffer if possible */
if(flush && U_SUCCESS(*pErrorCode)) {
if(*target!=targetLimit) {
**target=0;
if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
*pErrorCode=U_ZERO_ERROR;
}
} else {
*pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
}
}
}
/* internal implementation of ucnv_convert() etc. with preflighting */
static int32_t
ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
UChar pivotBuffer[CHUNK_SIZE];
UChar *pivot, *pivot2;
char *myTarget;
const char *sourceLimit;
const char *targetLimit;
int32_t targetLength=0;
/* set up */
if(sourceLength<0) {
sourceLimit=uprv_strchr(source, 0);
} else {
sourceLimit=source+sourceLength;
}
/* if there is no input data, we're done */
if(source==sourceLimit) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
}
pivot=pivot2=pivotBuffer;
myTarget=target;
targetLength=0;
if(targetCapacity>0) {
/* perform real conversion */
targetLimit=target+targetCapacity;
ucnv_convertEx(outConverter, inConverter,
&myTarget, targetLimit,
&source, sourceLimit,
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
FALSE,
TRUE,
pErrorCode);
targetLength=(int32_t)(myTarget-target);
}
/*
* If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
* to it but continue the conversion in order to store in targetCapacity
* the number of bytes that was required.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
{
char targetBuffer[CHUNK_SIZE];
targetLimit=targetBuffer+CHUNK_SIZE;
do {
*pErrorCode=U_ZERO_ERROR;
myTarget=targetBuffer;
ucnv_convertEx(outConverter, inConverter,
&myTarget, targetLimit,
&source, sourceLimit,
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
FALSE,
TRUE,
pErrorCode);
targetLength+=(int32_t)(myTarget-targetBuffer);
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
/* done with preflighting, set warnings and errors as appropriate */
return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
}
/* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
return targetLength;
}
U_CAPI int32_t U_EXPORT2
ucnv_convert(const char *toConverterName, const char *fromConverterName,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
UConverter in, out; /* stack-allocated */
UConverter *inConverter, *outConverter;
int32_t targetLength;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( source==NULL || sourceLength<-1 ||
targetCapacity<0 || (targetCapacity>0 && target==NULL)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* if there is no input data, we're done */
if(sourceLength==0 || (sourceLength<0 && *source==0)) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
}
/* create the converters */
inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
ucnv_close(inConverter);
return 0;
}
targetLength=ucnv_internalConvert(outConverter, inConverter,
target, targetCapacity,
source, sourceLength,
pErrorCode);
ucnv_close(inConverter);
ucnv_close(outConverter);
return targetLength;
}
/* @internal */
static int32_t
ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
UConverterType algorithmicType,
UConverter *cnv,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
UConverter algoConverterStatic; /* stack-allocated */
UConverter *algoConverter, *to, *from;
int32_t targetLength;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( cnv==NULL || source==NULL || sourceLength<-1 ||
targetCapacity<0 || (targetCapacity>0 && target==NULL)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* if there is no input data, we're done */
if(sourceLength==0 || (sourceLength<0 && *source==0)) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
}
/* create the algorithmic converter */
algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
"", 0, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
/* reset the other converter */
if(convertToAlgorithmic) {
/* cnv->Unicode->algo */
ucnv_resetToUnicode(cnv);
to=algoConverter;
from=cnv;
} else {
/* algo->Unicode->cnv */
ucnv_resetFromUnicode(cnv);
from=algoConverter;
to=cnv;
}
targetLength=ucnv_internalConvert(to, from,
target, targetCapacity,
source, sourceLength,
pErrorCode);
ucnv_close(algoConverter);
return targetLength;
}
U_CAPI int32_t U_EXPORT2
ucnv_toAlgorithmic(UConverterType algorithmicType,
UConverter *cnv,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
target, targetCapacity,
source, sourceLength,
pErrorCode);
}
U_CAPI int32_t U_EXPORT2
ucnv_fromAlgorithmic(UConverter *cnv,
UConverterType algorithmicType,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
target, targetCapacity,
source, sourceLength,
pErrorCode);
}
U_CAPI UConverterType U_EXPORT2
ucnv_getType(const UConverter* converter)
{
int8_t type = converter->sharedData->staticData->conversionType;
#if !UCONFIG_NO_LEGACY_CONVERSION
if(type == UCNV_MBCS) {
return ucnv_MBCSGetType(converter);
}
#endif
return (UConverterType)type;
}
U_CAPI void U_EXPORT2
ucnv_getStarters(const UConverter* converter,
UBool starters[256],
UErrorCode* err)
{
if (err == NULL || U_FAILURE(*err)) {
return;
}
if(converter->sharedData->impl->getStarters != NULL) {
converter->sharedData->impl->getStarters(converter, starters, err);
} else {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
}
static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
{
UErrorCode errorCode;
const char *name;
int32_t i;
if(cnv==NULL) {
return NULL;
}
errorCode=U_ZERO_ERROR;
name=ucnv_getName(cnv, &errorCode);
if(U_FAILURE(errorCode)) {
return NULL;
}
for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
{
if(0==uprv_strcmp(name, ambiguousConverters[i].name))
{
return ambiguousConverters+i;
}
}
return NULL;
}
U_CAPI void U_EXPORT2
ucnv_fixFileSeparator(const UConverter *cnv,
UChar* source,
int32_t sourceLength) {
const UAmbiguousConverter *a;
int32_t i;
UChar variant5c;
if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
{
return;
}
variant5c=a->variant5c;
for(i=0; i<sourceLength; ++i) {
if(source[i]==variant5c) {
source[i]=0x5c;
}
}
}
U_CAPI UBool U_EXPORT2
ucnv_isAmbiguous(const UConverter *cnv) {
return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
}
U_CAPI void U_EXPORT2
ucnv_setFallback(UConverter *cnv, UBool usesFallback)
{
cnv->useFallback = usesFallback;
}
U_CAPI UBool U_EXPORT2
ucnv_usesFallback(const UConverter *cnv)
{
return cnv->useFallback;
}
U_CAPI void U_EXPORT2
ucnv_getInvalidChars (const UConverter * converter,
char *errBytes,
int8_t * len,
UErrorCode * err)
{
if (err == NULL || U_FAILURE(*err))
{
return;
}
if (len == NULL || errBytes == NULL || converter == NULL)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (*len < converter->invalidCharLength)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
if ((*len = converter->invalidCharLength) > 0)
{
uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
}
}
U_CAPI void U_EXPORT2
ucnv_getInvalidUChars (const UConverter * converter,
UChar *errChars,
int8_t * len,
UErrorCode * err)
{
if (err == NULL || U_FAILURE(*err))
{
return;
}
if (len == NULL || errChars == NULL || converter == NULL)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (*len < converter->invalidUCharLength)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
if ((*len = converter->invalidUCharLength) > 0)
{
uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
}
}
#define SIG_MAX_LEN 5
U_CAPI const char* U_EXPORT2
ucnv_detectUnicodeSignature( const char* source,
int32_t sourceLength,
int32_t* signatureLength,
UErrorCode* pErrorCode) {
int32_t dummy;
/* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
* bytes we don't misdetect something
*/
char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
int i = 0;
if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
return NULL;
}
if(source == NULL || sourceLength < -1){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(signatureLength == NULL) {
signatureLength = &dummy;
}
if(sourceLength==-1){
sourceLength=(int32_t)uprv_strlen(source);
}
while(i<sourceLength&& i<SIG_MAX_LEN){
start[i]=source[i];
i++;
}
if(start[0] == '\xFE' && start[1] == '\xFF') {
*signatureLength=2;
return "UTF-16BE";
} else if(start[0] == '\xFF' && start[1] == '\xFE') {
if(start[2] == '\x00' && start[3] =='\x00') {
*signatureLength=4;
return "UTF-32LE";
} else {
*signatureLength=2;
return "UTF-16LE";
}
} else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
*signatureLength=3;
return "UTF-8";
} else if(start[0] == '\x00' && start[1] == '\x00' &&
start[2] == '\xFE' && start[3]=='\xFF') {
*signatureLength=4;
return "UTF-32BE";
} else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
*signatureLength=3;
return "SCSU";
} else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
*signatureLength=3;
return "BOCU-1";
} else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
/*
* UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
* depending on the second UTF-16 code unit.
* Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
* if it occurs.
*
* So far we have +/v
*/
if(start[3] == '\x38' && start[4] == '\x2D') {
/* 5 bytes +/v8- */
*signatureLength=5;
return "UTF-7";
} else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
/* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
*signatureLength=4;
return "UTF-7";
}
}else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
*signatureLength=4;
return "UTF-EBCDIC";
}
/* no known Unicode signature byte sequence recognized */
*signatureLength=0;
return NULL;
}
U_CAPI int32_t U_EXPORT2
ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
{
if(status == NULL || U_FAILURE(*status)){
return -1;
}
if(cnv == NULL){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return -1;
}
if(cnv->preFromUFirstCP >= 0){
return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
}else if(cnv->preFromULength < 0){
return -cnv->preFromULength ;
}else if(cnv->fromUChar32 > 0){
return 1;
}
return 0;
}
U_CAPI int32_t U_EXPORT2
ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return -1;
}
if(cnv == NULL){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return -1;
}
if(cnv->preToULength > 0){
return cnv->preToULength ;
}else if(cnv->preToULength < 0){
return -cnv->preToULength;
}else if(cnv->toULength > 0){
return cnv->toULength;
}
return 0;
}
U_CAPI UBool U_EXPORT2
ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
if (U_FAILURE(*status)) {
return FALSE;
}
if (cnv == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
switch (ucnv_getType(cnv)) {
case UCNV_SBCS:
case UCNV_DBCS:
case UCNV_UTF32_BigEndian:
case UCNV_UTF32_LittleEndian:
case UCNV_UTF32:
case UCNV_US_ASCII:
return TRUE;
default:
return FALSE;
}
}
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/