mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
805dd78c93
Bug 924839 - Remove a patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream. r=glandium * * * Bug 924839 - Remove another patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10290 for that. r=gaston * * * Bug 924839 - Remove another patch already in ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10045 for more. r=Norbert * * * Bug 924839 - Remove another patch already applied upstream. See http://bugs.icu-project.org/trac/changeset/32937 for more. r=gaston * * * Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches. r=glandium * * * Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else. r=glandium * * * Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches). These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/. rs=the-usual-suspects * * * Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems. r=gaston * * * Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows. r=glandium * * * Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option. See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing. r=Norbert * * * Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows. r=Norbert * * * Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step). Thus if the update script is properly run, now, the final result should be no changes at all to the tree. NOT REVIEWED YET * * * Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24. r=Norbert
1385 lines
51 KiB
C++
1385 lines
51 KiB
C++
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 1996-2013, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* file name: ucol_res.cpp
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* Description:
|
|
* This file contains dependencies that the collation run-time doesn't normally
|
|
* need. This mainly contains resource bundle usage and collation meta information
|
|
*
|
|
* Modification history
|
|
* Date Name Comments
|
|
* 1996-1999 various members of ICU team maintained C API for collation framework
|
|
* 02/16/2001 synwee Added internal method getPrevSpecialCE
|
|
* 03/01/2001 synwee Added maxexpansion functionality.
|
|
* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
|
|
* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
#include "unicode/uloc.h"
|
|
#include "unicode/coll.h"
|
|
#include "unicode/tblcoll.h"
|
|
#include "unicode/caniter.h"
|
|
#include "unicode/uscript.h"
|
|
#include "unicode/ustring.h"
|
|
|
|
#include "ucol_bld.h"
|
|
#include "ucol_imp.h"
|
|
#include "ucol_tok.h"
|
|
#include "ucol_elm.h"
|
|
#include "uresimp.h"
|
|
#include "ustr_imp.h"
|
|
#include "cstring.h"
|
|
#include "umutex.h"
|
|
#include "ucln_in.h"
|
|
#include "ustrenum.h"
|
|
#include "putilimp.h"
|
|
#include "utracimp.h"
|
|
#include "cmemory.h"
|
|
#include "uassert.h"
|
|
#include "uenumimp.h"
|
|
#include "ulist.h"
|
|
|
|
U_NAMESPACE_USE
|
|
|
|
static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status);
|
|
|
|
// static UCA. There is only one. Collators don't use it.
|
|
// It is referenced only in ucol_initUCA and ucol_cleanup
|
|
static UCollator* _staticUCA = NULL;
|
|
static icu::UInitOnce gStaticUCAInitOnce = U_INITONCE_INITIALIZER;
|
|
// static pointer to udata memory. Inited in ucol_initUCA
|
|
// used for cleanup in ucol_cleanup
|
|
static UDataMemory* UCA_DATA_MEM = NULL;
|
|
|
|
U_CDECL_BEGIN
|
|
static UBool U_CALLCONV
|
|
ucol_res_cleanup(void)
|
|
{
|
|
if (UCA_DATA_MEM) {
|
|
udata_close(UCA_DATA_MEM);
|
|
UCA_DATA_MEM = NULL;
|
|
}
|
|
if (_staticUCA) {
|
|
ucol_close(_staticUCA);
|
|
_staticUCA = NULL;
|
|
}
|
|
gStaticUCAInitOnce.reset();
|
|
return TRUE;
|
|
}
|
|
|
|
static UBool U_CALLCONV
|
|
isAcceptableUCA(void * /*context*/,
|
|
const char * /*type*/, const char * /*name*/,
|
|
const UDataInfo *pInfo){
|
|
/* context, type & name are intentionally not used */
|
|
if( pInfo->size>=20 &&
|
|
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
|
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
|
pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */
|
|
pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
|
|
pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
|
|
pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
|
|
pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0
|
|
#if UCA_FORMAT_VERSION_1!=0
|
|
&& pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1
|
|
#endif
|
|
//pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
|
|
//pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
|
|
//pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
|
|
) {
|
|
return TRUE;
|
|
// Note: In ICU 51 and earlier,
|
|
// we used to check that the UCA data version (pInfo->dataVersion)
|
|
// matches the UCD version (u_getUnicodeVersion())
|
|
// but that complicated version updates, and
|
|
// a mismatch is "only" a problem for handling canonical equivalence.
|
|
// It need not be a fatal error.
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
U_CDECL_END
|
|
|
|
static void U_CALLCONV ucol_initStaticUCA(UErrorCode &status) {
|
|
U_ASSERT(_staticUCA == NULL);
|
|
U_ASSERT(UCA_DATA_MEM == NULL);
|
|
ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
|
|
|
|
UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, &status);
|
|
if(U_FAILURE(status)){
|
|
udata_close(result);
|
|
return;
|
|
}
|
|
|
|
_staticUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, &status);
|
|
if(U_SUCCESS(status)){
|
|
// Initalize variables for implicit generation
|
|
uprv_uca_initImplicitConstants(&status);
|
|
UCA_DATA_MEM = result;
|
|
|
|
}else{
|
|
ucol_close(_staticUCA);
|
|
_staticUCA = NULL;
|
|
udata_close(result);
|
|
}
|
|
}
|
|
|
|
|
|
/* do not close UCA returned by ucol_initUCA! */
|
|
UCollator *
|
|
ucol_initUCA(UErrorCode *status) {
|
|
umtx_initOnce(gStaticUCAInitOnce, &ucol_initStaticUCA, *status);
|
|
return _staticUCA;
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
ucol_forgetUCA(void)
|
|
{
|
|
_staticUCA = NULL;
|
|
UCA_DATA_MEM = NULL;
|
|
gStaticUCAInitOnce.reset();
|
|
}
|
|
|
|
/****************************************************************************/
|
|
/* Following are the open/close functions */
|
|
/* */
|
|
/****************************************************************************/
|
|
static UCollator*
|
|
tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
|
|
int32_t rulesLen = 0;
|
|
const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
|
|
return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
|
|
}
|
|
|
|
|
|
// API in ucol_imp.h
|
|
|
|
U_CFUNC UCollator*
|
|
ucol_open_internal(const char *loc,
|
|
UErrorCode *status)
|
|
{
|
|
UErrorCode intStatus = U_ZERO_ERROR;
|
|
const UCollator* UCA = ucol_initUCA(status);
|
|
|
|
/* New version */
|
|
if(U_FAILURE(*status)) return 0;
|
|
|
|
|
|
|
|
UCollator *result = NULL;
|
|
UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
|
|
|
|
/* we try to find stuff from keyword */
|
|
UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
|
|
UResourceBundle *collElem = NULL;
|
|
char keyBuffer[256];
|
|
// if there is a keyword, we pick it up and try to get elements
|
|
if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status) ||
|
|
!uprv_strcmp(keyBuffer,"default")) { /* Treat 'zz@collation=default' as 'zz'. */
|
|
// no keyword. we try to find the default setting, which will give us the keyword value
|
|
intStatus = U_ZERO_ERROR;
|
|
// finding default value does not affect collation fallback status
|
|
UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
|
|
if(U_SUCCESS(intStatus)) {
|
|
int32_t defaultKeyLen = 0;
|
|
const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
|
|
u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
|
|
keyBuffer[defaultKeyLen] = 0;
|
|
} else {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
return NULL;
|
|
}
|
|
ures_close(defaultColl);
|
|
}
|
|
collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status);
|
|
collations = NULL; // We just reused the collations object as collElem.
|
|
|
|
UResourceBundle *binary = NULL;
|
|
UResourceBundle *reorderRes = NULL;
|
|
|
|
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
|
|
*status = U_USING_DEFAULT_WARNING;
|
|
result = ucol_initCollator(UCA->image, result, UCA, status);
|
|
if (U_FAILURE(*status)) {
|
|
goto clean;
|
|
}
|
|
// if we use UCA, real locale is root
|
|
ures_close(b);
|
|
b = ures_open(U_ICUDATA_COLL, "", status);
|
|
ures_close(collElem);
|
|
collElem = ures_open(U_ICUDATA_COLL, "", status);
|
|
if(U_FAILURE(*status)) {
|
|
goto clean;
|
|
}
|
|
result->hasRealData = FALSE;
|
|
} else if(U_SUCCESS(*status)) {
|
|
intStatus = U_ZERO_ERROR;
|
|
|
|
binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus);
|
|
|
|
if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
|
|
binary = NULL;
|
|
result = tryOpeningFromRules(collElem, status);
|
|
if(U_FAILURE(*status)) {
|
|
goto clean;
|
|
}
|
|
} else if(U_SUCCESS(intStatus)) { /* otherwise, we'll pick a collation data that exists */
|
|
int32_t len = 0;
|
|
const uint8_t *inData = ures_getBinary(binary, &len, status);
|
|
if(U_FAILURE(*status)) {
|
|
goto clean;
|
|
}
|
|
UCATableHeader *colData = (UCATableHeader *)inData;
|
|
if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
|
|
uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
|
|
colData->version[0] != UCOL_BUILDER_VERSION)
|
|
{
|
|
*status = U_DIFFERENT_UCA_VERSION;
|
|
result = tryOpeningFromRules(collElem, status);
|
|
} else {
|
|
if(U_FAILURE(*status)){
|
|
goto clean;
|
|
}
|
|
if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
|
|
result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
|
|
if(U_FAILURE(*status)){
|
|
goto clean;
|
|
}
|
|
result->hasRealData = TRUE;
|
|
} else {
|
|
result = ucol_initCollator(UCA->image, result, UCA, status);
|
|
ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
|
|
if(U_FAILURE(*status)){
|
|
goto clean;
|
|
}
|
|
result->hasRealData = FALSE;
|
|
}
|
|
result->freeImageOnClose = FALSE;
|
|
|
|
reorderRes = ures_getByKey(collElem, "%%ReorderCodes", NULL, &intStatus);
|
|
if (U_SUCCESS(intStatus)) {
|
|
int32_t reorderCodesLen = 0;
|
|
const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status);
|
|
if (reorderCodesLen > 0) {
|
|
ucol_setReorderCodes(result, reorderCodes, reorderCodesLen, status);
|
|
// copy the reorder codes into the default reorder codes
|
|
result->defaultReorderCodesLength = result->reorderCodesLength;
|
|
result->defaultReorderCodes = (int32_t*) uprv_malloc(result->defaultReorderCodesLength * sizeof(int32_t));
|
|
uprv_memcpy(result->defaultReorderCodes, result->reorderCodes, result->defaultReorderCodesLength * sizeof(int32_t));
|
|
result->freeDefaultReorderCodesOnClose = TRUE;
|
|
}
|
|
if (U_FAILURE(*status)) {
|
|
goto clean;
|
|
}
|
|
}
|
|
}
|
|
|
|
} else { // !U_SUCCESS(binaryStatus)
|
|
if(U_SUCCESS(*status)) {
|
|
*status = intStatus; // propagate underlying error
|
|
}
|
|
goto clean;
|
|
}
|
|
intStatus = U_ZERO_ERROR;
|
|
result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus);
|
|
result->freeRulesOnClose = FALSE;
|
|
} else { /* There is another error, and we're just gonna clean up */
|
|
goto clean;
|
|
}
|
|
|
|
intStatus = U_ZERO_ERROR;
|
|
result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus);
|
|
|
|
if(loc == NULL) {
|
|
loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status);
|
|
}
|
|
result->requestedLocale = uprv_strdup(loc);
|
|
/* test for NULL */
|
|
if (result->requestedLocale == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
goto clean;
|
|
}
|
|
loc = ures_getLocaleByType(collElem, ULOC_ACTUAL_LOCALE, status);
|
|
result->actualLocale = uprv_strdup(loc);
|
|
/* test for NULL */
|
|
if (result->actualLocale == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
goto clean;
|
|
}
|
|
loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status);
|
|
result->validLocale = uprv_strdup(loc);
|
|
/* test for NULL */
|
|
if (result->validLocale == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
goto clean;
|
|
}
|
|
|
|
ures_close(b);
|
|
ures_close(collElem);
|
|
ures_close(binary);
|
|
ures_close(reorderRes);
|
|
return result;
|
|
|
|
clean:
|
|
ures_close(b);
|
|
ures_close(collElem);
|
|
ures_close(binary);
|
|
ures_close(reorderRes);
|
|
ucol_close(result);
|
|
return NULL;
|
|
}
|
|
|
|
U_CAPI UCollator*
|
|
ucol_open(const char *loc,
|
|
UErrorCode *status)
|
|
{
|
|
U_NAMESPACE_USE
|
|
|
|
UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
|
|
UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
|
|
UCollator *result = NULL;
|
|
|
|
#if !UCONFIG_NO_SERVICE
|
|
result = Collator::createUCollator(loc, status);
|
|
if (result == NULL)
|
|
#endif
|
|
{
|
|
result = ucol_open_internal(loc, status);
|
|
}
|
|
UTRACE_EXIT_PTR_STATUS(result, *status);
|
|
return result;
|
|
}
|
|
|
|
|
|
UCollator*
|
|
ucol_openRulesForImport( const UChar *rules,
|
|
int32_t rulesLength,
|
|
UColAttributeValue normalizationMode,
|
|
UCollationStrength strength,
|
|
UParseError *parseError,
|
|
GetCollationRulesFunction importFunc,
|
|
void* context,
|
|
UErrorCode *status)
|
|
{
|
|
UColTokenParser src;
|
|
UColAttributeValue norm;
|
|
UParseError tErr;
|
|
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
if(rules == NULL || rulesLength < -1) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
if(rulesLength == -1) {
|
|
rulesLength = u_strlen(rules);
|
|
}
|
|
|
|
if(parseError == NULL){
|
|
parseError = &tErr;
|
|
}
|
|
|
|
switch(normalizationMode) {
|
|
case UCOL_OFF:
|
|
case UCOL_ON:
|
|
case UCOL_DEFAULT:
|
|
norm = normalizationMode;
|
|
break;
|
|
default:
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
UCollator *result = NULL;
|
|
UCATableHeader *table = NULL;
|
|
UCollator *UCA = ucol_initUCA(status);
|
|
|
|
if(U_FAILURE(*status)){
|
|
return NULL;
|
|
}
|
|
|
|
ucol_tok_initTokenList(&src, rules, rulesLength, UCA, importFunc, context, status);
|
|
ucol_tok_assembleTokenList(&src,parseError, status);
|
|
|
|
if(U_FAILURE(*status)) {
|
|
/* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
|
|
/* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
|
|
/* so something might be done here... or on lower level */
|
|
#ifdef UCOL_DEBUG
|
|
if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
|
|
fprintf(stderr, "bad option starting at offset %i\n", (int)(src.current-src.source));
|
|
} else {
|
|
fprintf(stderr, "invalid rule just before offset %i\n", (int)(src.current-src.source));
|
|
}
|
|
#endif
|
|
goto cleanup;
|
|
}
|
|
|
|
/* if we have a set of rules, let's make something of it */
|
|
if(src.resultLen > 0 || src.removeSet != NULL) {
|
|
/* also, if we wanted to remove some contractions, we should make a tailoring */
|
|
table = ucol_assembleTailoringTable(&src, status);
|
|
if(U_SUCCESS(*status)) {
|
|
// builder version
|
|
table->version[0] = UCOL_BUILDER_VERSION;
|
|
// no tailoring information on this level
|
|
table->version[1] = table->version[2] = table->version[3] = 0;
|
|
// set UCD version
|
|
u_getUnicodeVersion(table->UCDVersion);
|
|
// set UCA version
|
|
uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
|
|
result = ucol_initCollator(table, 0, UCA, status);
|
|
if (U_FAILURE(*status)) {
|
|
goto cleanup;
|
|
}
|
|
result->hasRealData = TRUE;
|
|
result->freeImageOnClose = TRUE;
|
|
} else {
|
|
goto cleanup;
|
|
}
|
|
} else { /* no rules, but no error either */
|
|
// must be only options
|
|
// We will init the collator from UCA
|
|
result = ucol_initCollator(UCA->image, 0, UCA, status);
|
|
// Check for null result
|
|
if (U_FAILURE(*status)) {
|
|
goto cleanup;
|
|
}
|
|
// And set only the options
|
|
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
|
|
/* test for NULL */
|
|
if (opts == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
goto cleanup;
|
|
}
|
|
uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
|
|
ucol_setOptionsFromHeader(result, opts, status);
|
|
result->freeOptionsOnClose = TRUE;
|
|
result->hasRealData = FALSE;
|
|
result->freeImageOnClose = FALSE;
|
|
}
|
|
|
|
ucol_setReorderCodesFromParser(result, &src, status);
|
|
|
|
if(U_SUCCESS(*status)) {
|
|
UChar *newRules;
|
|
result->dataVersion[0] = UCOL_BUILDER_VERSION;
|
|
if(rulesLength > 0) {
|
|
newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
|
|
/* test for NULL */
|
|
if (newRules == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
goto cleanup;
|
|
}
|
|
uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
|
|
newRules[rulesLength]=0;
|
|
result->rules = newRules;
|
|
result->rulesLength = rulesLength;
|
|
result->freeRulesOnClose = TRUE;
|
|
}
|
|
result->ucaRules = NULL;
|
|
result->actualLocale = NULL;
|
|
result->validLocale = NULL;
|
|
result->requestedLocale = NULL;
|
|
ucol_buildPermutationTable(result, status);
|
|
ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
|
|
ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
|
|
} else {
|
|
cleanup:
|
|
if(result != NULL) {
|
|
ucol_close(result);
|
|
} else {
|
|
if(table != NULL) {
|
|
uprv_free(table);
|
|
}
|
|
}
|
|
result = NULL;
|
|
}
|
|
|
|
ucol_tok_closeTokenList(&src);
|
|
|
|
return result;
|
|
}
|
|
|
|
U_CAPI UCollator* U_EXPORT2
|
|
ucol_openRules( const UChar *rules,
|
|
int32_t rulesLength,
|
|
UColAttributeValue normalizationMode,
|
|
UCollationStrength strength,
|
|
UParseError *parseError,
|
|
UErrorCode *status)
|
|
{
|
|
return ucol_openRulesForImport(rules,
|
|
rulesLength,
|
|
normalizationMode,
|
|
strength,
|
|
parseError,
|
|
ucol_tok_getRulesFromBundle,
|
|
NULL,
|
|
status);
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
int32_t len = 0;
|
|
int32_t UCAlen = 0;
|
|
const UChar* ucaRules = 0;
|
|
const UChar *rules = ucol_getRules(coll, &len);
|
|
if(delta == UCOL_FULL_RULES) {
|
|
/* take the UCA rules and append real rules at the end */
|
|
/* UCA rules will be probably coming from the root RB */
|
|
ucaRules = coll->ucaRules;
|
|
if (ucaRules) {
|
|
UCAlen = u_strlen(ucaRules);
|
|
}
|
|
/*
|
|
ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
|
|
UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
|
|
UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
|
|
ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
|
|
ures_close(uca);
|
|
ures_close(cresb);
|
|
*/
|
|
}
|
|
if(U_FAILURE(status)) {
|
|
return 0;
|
|
}
|
|
if(buffer!=0 && bufferLen>0){
|
|
*buffer=0;
|
|
if(UCAlen > 0) {
|
|
u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
|
|
}
|
|
if(len > 0 && bufferLen > UCAlen) {
|
|
u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
|
|
}
|
|
}
|
|
return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
|
|
}
|
|
|
|
static const UChar _NUL = 0;
|
|
|
|
U_CAPI const UChar* U_EXPORT2
|
|
ucol_getRules( const UCollator *coll,
|
|
int32_t *length)
|
|
{
|
|
if(coll->rules != NULL) {
|
|
*length = coll->rulesLength;
|
|
return coll->rules;
|
|
}
|
|
else {
|
|
*length = 0;
|
|
return &_NUL;
|
|
}
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
ucol_equals(const UCollator *source, const UCollator *target) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
// if pointers are equal, collators are equal
|
|
if(source == target) {
|
|
return TRUE;
|
|
}
|
|
int32_t i = 0, j = 0;
|
|
// if any of attributes are different, collators are not equal
|
|
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
|
|
if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
|
|
return FALSE;
|
|
}
|
|
}
|
|
if (source->reorderCodesLength != target->reorderCodesLength){
|
|
return FALSE;
|
|
}
|
|
for (i = 0; i < source->reorderCodesLength; i++) {
|
|
if(source->reorderCodes[i] != target->reorderCodes[i]) {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
int32_t sourceRulesLen = 0, targetRulesLen = 0;
|
|
const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
|
|
const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
|
|
|
|
if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
|
|
// all the attributes are equal and the rules are equal - collators are equal
|
|
return(TRUE);
|
|
}
|
|
// hard part, need to construct tree from rules and see if they yield the same tailoring
|
|
UBool result = TRUE;
|
|
UParseError parseError;
|
|
UColTokenParser sourceParser, targetParser;
|
|
int32_t sourceListLen = 0, targetListLen = 0;
|
|
ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, ucol_tok_getRulesFromBundle, NULL, &status);
|
|
ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, ucol_tok_getRulesFromBundle, NULL, &status);
|
|
sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
|
|
targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
|
|
|
|
if(sourceListLen != targetListLen) {
|
|
// different number of resets
|
|
result = FALSE;
|
|
} else {
|
|
UColToken *sourceReset = NULL, *targetReset = NULL;
|
|
UChar *sourceResetString = NULL, *targetResetString = NULL;
|
|
int32_t sourceStringLen = 0, targetStringLen = 0;
|
|
for(i = 0; i < sourceListLen; i++) {
|
|
sourceReset = sourceParser.lh[i].reset;
|
|
sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
|
|
sourceStringLen = sourceReset->source >> 24;
|
|
for(j = 0; j < sourceListLen; j++) {
|
|
targetReset = targetParser.lh[j].reset;
|
|
targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
|
|
targetStringLen = targetReset->source >> 24;
|
|
if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
|
|
sourceReset = sourceParser.lh[i].first;
|
|
targetReset = targetParser.lh[j].first;
|
|
while(sourceReset != NULL && targetReset != NULL) {
|
|
sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
|
|
sourceStringLen = sourceReset->source >> 24;
|
|
targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
|
|
targetStringLen = targetReset->source >> 24;
|
|
if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
|
|
result = FALSE;
|
|
goto returnResult;
|
|
}
|
|
// probably also need to check the expansions
|
|
if(sourceReset->expansion) {
|
|
if(!targetReset->expansion) {
|
|
result = FALSE;
|
|
goto returnResult;
|
|
} else {
|
|
// compare expansions
|
|
sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
|
|
sourceStringLen = sourceReset->expansion >> 24;
|
|
targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
|
|
targetStringLen = targetReset->expansion >> 24;
|
|
if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
|
|
result = FALSE;
|
|
goto returnResult;
|
|
}
|
|
}
|
|
} else {
|
|
if(targetReset->expansion) {
|
|
result = FALSE;
|
|
goto returnResult;
|
|
}
|
|
}
|
|
sourceReset = sourceReset->next;
|
|
targetReset = targetReset->next;
|
|
}
|
|
if(sourceReset != targetReset) { // at least one is not NULL
|
|
// there are more tailored elements in one list
|
|
result = FALSE;
|
|
goto returnResult;
|
|
}
|
|
|
|
|
|
break;
|
|
}
|
|
}
|
|
// couldn't find the reset anchor, so the collators are not equal
|
|
if(j == sourceListLen) {
|
|
result = FALSE;
|
|
goto returnResult;
|
|
}
|
|
}
|
|
}
|
|
|
|
returnResult:
|
|
ucol_tok_closeTokenList(&sourceParser);
|
|
ucol_tok_closeTokenList(&targetParser);
|
|
return result;
|
|
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
ucol_getDisplayName( const char *objLoc,
|
|
const char *dispLoc,
|
|
UChar *result,
|
|
int32_t resultLength,
|
|
UErrorCode *status)
|
|
{
|
|
U_NAMESPACE_USE
|
|
|
|
if(U_FAILURE(*status)) return -1;
|
|
UnicodeString dst;
|
|
if(!(result==NULL && resultLength==0)) {
|
|
// NULL destination for pure preflighting: empty dummy string
|
|
// otherwise, alias the destination buffer
|
|
dst.setTo(result, 0, resultLength);
|
|
}
|
|
Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
|
|
return dst.extract(result, resultLength, *status);
|
|
}
|
|
|
|
U_CAPI const char* U_EXPORT2
|
|
ucol_getAvailable(int32_t index)
|
|
{
|
|
int32_t count = 0;
|
|
const Locale *loc = Collator::getAvailableLocales(count);
|
|
if (loc != NULL && index < count) {
|
|
return loc[index].getName();
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
ucol_countAvailable()
|
|
{
|
|
int32_t count = 0;
|
|
Collator::getAvailableLocales(count);
|
|
return count;
|
|
}
|
|
|
|
#if !UCONFIG_NO_SERVICE
|
|
U_CAPI UEnumeration* U_EXPORT2
|
|
ucol_openAvailableLocales(UErrorCode *status) {
|
|
U_NAMESPACE_USE
|
|
|
|
// This is a wrapper over Collator::getAvailableLocales()
|
|
if (U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
StringEnumeration *s = icu::Collator::getAvailableLocales();
|
|
if (s == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
return uenum_openFromStringEnumeration(s, status);
|
|
}
|
|
#endif
|
|
|
|
// Note: KEYWORDS[0] != RESOURCE_NAME - alan
|
|
|
|
static const char RESOURCE_NAME[] = "collations";
|
|
|
|
static const char* const KEYWORDS[] = { "collation" };
|
|
|
|
#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
|
|
|
|
U_CAPI UEnumeration* U_EXPORT2
|
|
ucol_getKeywords(UErrorCode *status) {
|
|
UEnumeration *result = NULL;
|
|
if (U_SUCCESS(*status)) {
|
|
return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
U_CAPI UEnumeration* U_EXPORT2
|
|
ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
|
|
if (U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
// hard-coded to accept exactly one collation keyword
|
|
// modify if additional collation keyword is added later
|
|
if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
|
|
{
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return NULL;
|
|
}
|
|
return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
|
|
}
|
|
|
|
static const UEnumeration defaultKeywordValues = {
|
|
NULL,
|
|
NULL,
|
|
ulist_close_keyword_values_iterator,
|
|
ulist_count_keyword_values,
|
|
uenum_unextDefault,
|
|
ulist_next_keyword_value,
|
|
ulist_reset_keyword_values_iterator
|
|
};
|
|
|
|
#include <stdio.h>
|
|
|
|
U_CAPI UEnumeration* U_EXPORT2
|
|
ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
|
|
UBool /*commonlyUsed*/, UErrorCode* status) {
|
|
/* Get the locale base name. */
|
|
char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
|
|
uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
|
|
|
|
/* Create the 2 lists
|
|
* -values is the temp location for the keyword values
|
|
* -results hold the actual list used by the UEnumeration object
|
|
*/
|
|
UList *values = ulist_createEmptyList(status);
|
|
UList *results = ulist_createEmptyList(status);
|
|
UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
|
|
if (U_FAILURE(*status) || en == NULL) {
|
|
if (en == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
} else {
|
|
uprv_free(en);
|
|
}
|
|
ulist_deleteList(values);
|
|
ulist_deleteList(results);
|
|
return NULL;
|
|
}
|
|
|
|
memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
|
|
en->context = results;
|
|
|
|
/* Open the resource bundle for collation with the given locale. */
|
|
UResourceBundle bundle, collations, collres, defres;
|
|
ures_initStackObject(&bundle);
|
|
ures_initStackObject(&collations);
|
|
ures_initStackObject(&collres);
|
|
ures_initStackObject(&defres);
|
|
|
|
ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
|
|
|
|
while (U_SUCCESS(*status)) {
|
|
ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
|
|
ures_resetIterator(&collations);
|
|
while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
|
|
ures_getNextResource(&collations, &collres, status);
|
|
const char *key = ures_getKey(&collres);
|
|
/* If the key is default, get the string and store it in results list only
|
|
* if results list is empty.
|
|
*/
|
|
if (uprv_strcmp(key, "default") == 0) {
|
|
if (ulist_getListSize(results) == 0) {
|
|
char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
|
|
int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
|
|
|
|
ures_getNextResource(&collres, &defres, status);
|
|
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
|
/* optimize - use the utf-8 string */
|
|
ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
|
|
#else
|
|
{
|
|
const UChar* defString = ures_getString(&defres, &defcollLength, status);
|
|
if(U_SUCCESS(*status)) {
|
|
if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
} else {
|
|
u_UCharsToChars(defString, defcoll, defcollLength+1);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
ulist_addItemBeginList(results, defcoll, TRUE, status);
|
|
}
|
|
} else {
|
|
ulist_addItemEndList(values, key, FALSE, status);
|
|
}
|
|
}
|
|
|
|
/* If the locale is "" this is root so exit. */
|
|
if (uprv_strlen(localeBuffer) == 0) {
|
|
break;
|
|
}
|
|
/* Get the parent locale and open a new resource bundle. */
|
|
uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
|
|
ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
|
|
}
|
|
|
|
ures_close(&defres);
|
|
ures_close(&collres);
|
|
ures_close(&collations);
|
|
ures_close(&bundle);
|
|
|
|
if (U_SUCCESS(*status)) {
|
|
char *value = NULL;
|
|
ulist_resetList(values);
|
|
while ((value = (char *)ulist_getNext(values)) != NULL) {
|
|
if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
|
|
ulist_addItemEndList(results, value, FALSE, status);
|
|
if (U_FAILURE(*status)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
ulist_deleteList(values);
|
|
|
|
if (U_FAILURE(*status)){
|
|
uenum_close(en);
|
|
en = NULL;
|
|
} else {
|
|
ulist_resetList(results);
|
|
}
|
|
|
|
return en;
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
|
|
const char* keyword, const char* locale,
|
|
UBool* isAvailable, UErrorCode* status)
|
|
{
|
|
// N.B.: Resource name is "collations" but keyword is "collation"
|
|
return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
|
|
"collations", keyword, locale,
|
|
isAvailable, TRUE, status);
|
|
}
|
|
|
|
/* returns the locale name the collation data comes from */
|
|
U_CAPI const char * U_EXPORT2
|
|
ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
|
|
return ucol_getLocaleByType(coll, type, status);
|
|
}
|
|
|
|
U_CAPI const char * U_EXPORT2
|
|
ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
|
|
const char *result = NULL;
|
|
if(status == NULL || U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
|
|
UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
|
|
|
|
if(coll->delegate!=NULL) {
|
|
return ((const Collator*)coll->delegate)->getLocale(type, *status).getName();
|
|
}
|
|
switch(type) {
|
|
case ULOC_ACTUAL_LOCALE:
|
|
result = coll->actualLocale;
|
|
break;
|
|
case ULOC_VALID_LOCALE:
|
|
result = coll->validLocale;
|
|
break;
|
|
case ULOC_REQUESTED_LOCALE:
|
|
result = coll->requestedLocale;
|
|
break;
|
|
default:
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
|
|
UTRACE_EXIT_STATUS(*status);
|
|
return result;
|
|
}
|
|
|
|
U_CFUNC void U_EXPORT2
|
|
ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt)
|
|
{
|
|
if (coll) {
|
|
if (coll->validLocale) {
|
|
uprv_free(coll->validLocale);
|
|
}
|
|
coll->validLocale = validLocaleToAdopt;
|
|
if (coll->requestedLocale) { // should always have
|
|
uprv_free(coll->requestedLocale);
|
|
}
|
|
coll->requestedLocale = requestedLocaleToAdopt;
|
|
if (coll->actualLocale) {
|
|
uprv_free(coll->actualLocale);
|
|
}
|
|
coll->actualLocale = actualLocaleToAdopt;
|
|
}
|
|
}
|
|
|
|
U_CAPI USet * U_EXPORT2
|
|
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
|
|
{
|
|
U_NAMESPACE_USE
|
|
|
|
if(status == NULL || U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
if(coll == NULL || coll->UCA == NULL) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return NULL;
|
|
}
|
|
UParseError parseError;
|
|
UColTokenParser src;
|
|
int32_t rulesLen = 0;
|
|
const UChar *rules = ucol_getRules(coll, &rulesLen);
|
|
UBool startOfRules = TRUE;
|
|
// we internally use the C++ class, for the following reasons:
|
|
// 1. we need to utilize canonical iterator, which is a C++ only class
|
|
// 2. canonical iterator returns UnicodeStrings - USet cannot take them
|
|
// 3. USet is internally really UnicodeSet, C is just a wrapper
|
|
UnicodeSet *tailored = new UnicodeSet();
|
|
UnicodeString pattern;
|
|
UnicodeString empty;
|
|
CanonicalIterator it(empty, *status);
|
|
|
|
|
|
// The idea is to tokenize the rule set. For each non-reset token,
|
|
// we add all the canonicaly equivalent FCD sequences
|
|
ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, ucol_tok_getRulesFromBundle, NULL, status);
|
|
while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
|
|
startOfRules = FALSE;
|
|
if(src.parsedToken.strength != UCOL_TOK_RESET) {
|
|
const UChar *stuff = src.source+(src.parsedToken.charsOffset);
|
|
it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
|
|
pattern = it.next();
|
|
while(!pattern.isBogus()) {
|
|
if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
|
|
tailored->add(pattern);
|
|
}
|
|
pattern = it.next();
|
|
}
|
|
}
|
|
}
|
|
ucol_tok_closeTokenList(&src);
|
|
return (USet *)tailored;
|
|
}
|
|
|
|
/*
|
|
* Collation Reordering
|
|
*/
|
|
|
|
void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
|
|
return;
|
|
}
|
|
|
|
coll->reorderCodesLength = 0;
|
|
if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
|
|
uprv_free(coll->reorderCodes);
|
|
}
|
|
coll->reorderCodes = NULL;
|
|
coll->freeReorderCodesOnClose = FALSE;
|
|
|
|
if (coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) {
|
|
uprv_free(coll->defaultReorderCodes);
|
|
}
|
|
coll->freeDefaultReorderCodesOnClose = FALSE;
|
|
coll->defaultReorderCodesLength = parser->reorderCodesLength;
|
|
coll->defaultReorderCodes = (int32_t*) uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t));
|
|
if (coll->defaultReorderCodes == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memcpy(coll->defaultReorderCodes, parser->reorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t));
|
|
coll->freeDefaultReorderCodesOnClose = TRUE;
|
|
|
|
coll->reorderCodesLength = parser->reorderCodesLength;
|
|
coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t));
|
|
if (coll->reorderCodes == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
|
|
coll->freeReorderCodesOnClose = TRUE;
|
|
}
|
|
|
|
/*
|
|
* Data is stored in the reorder code to lead byte table as:
|
|
* index count - unsigned short (2 bytes) - number of index entries
|
|
* data size - unsigned short (2 bytes) - number of unsigned short data elements
|
|
* index[index count] - array of 2 unsigned shorts (4 bytes each entry)
|
|
* - reorder code, offset
|
|
* - index is sorted by reorder code
|
|
* - if an offset has the high bit set then it is not an offset but a single data entry
|
|
* once the high bit is stripped off
|
|
* data[data size] - array of unsigned short (2 bytes each entry)
|
|
* - the data is an usigned short count followed by count number
|
|
* of lead bytes stored in an unsigned short
|
|
*/
|
|
U_CFUNC int U_EXPORT2
|
|
ucol_getLeadBytesForReorderCode(const UCollator *uca, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
|
|
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte));
|
|
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte + 2 *sizeof(uint16_t));
|
|
|
|
// reorder code index is 2 uint16_t's - reorder code + offset
|
|
for (int i = 0; i < reorderCodeIndexLength; i++) {
|
|
if (reorderCode == reorderCodeIndex[i*2]) {
|
|
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
|
|
if ((dataOffset & 0x8000) == 0x8000) {
|
|
// offset isn't offset but instead is a single data element
|
|
if (returnCapacity >= 1) {
|
|
returnLeadBytes[0] = dataOffset & ~0x8000;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
|
|
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
|
|
leadByteCount = leadByteCount > returnCapacity ? returnCapacity : leadByteCount;
|
|
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
|
|
return leadByteCount;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Data is stored in the lead byte to reorder code table as:
|
|
* index count - unsigned short (2 bytes) - number of index entries
|
|
* data size - unsigned short (2 bytes) - number of unsigned short data elements
|
|
* index[index count] - array of unsigned short (2 bytes each entry)
|
|
* - index is sorted by lead byte
|
|
* - if an index has the high bit set then it is not an index but a single data entry
|
|
* once the high bit is stripped off
|
|
* data[data size] - array of unsigned short (2 bytes each entry)
|
|
* - the data is an usigned short count followed by count number of reorder codes
|
|
*/
|
|
U_CFUNC int U_EXPORT2
|
|
ucol_getReorderCodesForLeadByte(const UCollator *uca, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
|
|
uint16_t* leadByteTable = ((uint16_t*) ((uint8_t *)uca->image + uca->image->leadByteToScript));
|
|
uint16_t leadByteIndexLength = *leadByteTable;
|
|
if (leadByte >= leadByteIndexLength) {
|
|
return 0;
|
|
}
|
|
uint16_t leadByteIndex = *(leadByteTable + (2 + leadByte));
|
|
|
|
if ((leadByteIndex & 0x8000) == 0x8000) {
|
|
// offset isn't offset but instead is a single data element
|
|
if (returnCapacity >= 1) {
|
|
returnReorderCodes[0] = leadByteIndex & ~0x8000;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
//uint16_t* dataOffsetBase = leadByteTable + (2 + leadByteIndexLength);
|
|
uint16_t* reorderCodeData = leadByteTable + (2 + leadByteIndexLength) + leadByteIndex;
|
|
uint16_t reorderCodeCount = *reorderCodeData > returnCapacity ? returnCapacity : *reorderCodeData;
|
|
uprv_memcpy(returnReorderCodes, reorderCodeData + 1, reorderCodeCount * sizeof(uint16_t));
|
|
return reorderCodeCount;
|
|
}
|
|
|
|
// used to mark ignorable reorder code slots
|
|
static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1;
|
|
|
|
U_CFUNC void U_EXPORT2
|
|
ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
|
|
uint16_t leadBytesSize = 256;
|
|
uint16_t leadBytes[256];
|
|
|
|
// The lowest byte that hasn't been assigned a mapping
|
|
int toBottom = 0x03;
|
|
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
|
|
int toTop = 0xe4;
|
|
|
|
// are we filling from the bottom?
|
|
bool fromTheBottom = true;
|
|
int32_t reorderCodesIndex = -1;
|
|
|
|
// lead bytes that have alread been assigned to the permutation table
|
|
bool newLeadByteUsed[256];
|
|
// permutation table slots that have already been filled
|
|
bool permutationSlotFilled[256];
|
|
|
|
// nothing to do
|
|
if(U_FAILURE(*status) || coll == NULL) {
|
|
return;
|
|
}
|
|
|
|
// clear the reordering
|
|
if (coll->reorderCodes == NULL || coll->reorderCodesLength == 0
|
|
|| (coll->reorderCodesLength == 1 && coll->reorderCodes[0] == UCOL_REORDER_CODE_NONE)) {
|
|
if (coll->leadBytePermutationTable != NULL) {
|
|
if (coll->freeLeadBytePermutationTableOnClose) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
coll->reorderCodesLength = 0;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// set reordering to the default reordering
|
|
if (coll->reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
|
|
if (coll->reorderCodesLength != 1) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
if (coll->freeReorderCodesOnClose == TRUE) {
|
|
uprv_free(coll->reorderCodes);
|
|
}
|
|
coll->reorderCodes = NULL;
|
|
coll->freeReorderCodesOnClose = FALSE;
|
|
|
|
if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
|
|
if (coll->defaultReorderCodesLength == 0) {
|
|
return;
|
|
}
|
|
|
|
coll->reorderCodes = (int32_t*)uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t));
|
|
if (coll->reorderCodes == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
coll->freeReorderCodesOnClose = TRUE;
|
|
coll->reorderCodesLength = coll->defaultReorderCodesLength;
|
|
uprv_memcpy(coll->reorderCodes, coll->defaultReorderCodes, coll->reorderCodesLength * sizeof(int32_t));
|
|
}
|
|
|
|
if (coll->leadBytePermutationTable == NULL) {
|
|
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
|
|
if (coll->leadBytePermutationTable == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
coll->freeLeadBytePermutationTableOnClose = TRUE;
|
|
}
|
|
|
|
int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
|
|
LocalMemory<int32_t> internalReorderCodes((int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t)));
|
|
if (internalReorderCodes.isNull()) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
return;
|
|
}
|
|
|
|
// prefill the reordering codes with the leading entries
|
|
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) {
|
|
internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex;
|
|
}
|
|
for (int32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
|
|
uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
|
|
internalReorderCodes[codeIndex + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)] = reorderCodesCode;
|
|
if (reorderCodesCode >= UCOL_REORDER_CODE_FIRST && reorderCodesCode < UCOL_REORDER_CODE_LIMIT) {
|
|
internalReorderCodes[reorderCodesCode - UCOL_REORDER_CODE_FIRST] = UCOL_REORDER_CODE_IGNORE;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < 256; i++) {
|
|
if (i < toBottom || i > toTop) {
|
|
permutationSlotFilled[i] = true;
|
|
newLeadByteUsed[i] = true;
|
|
coll->leadBytePermutationTable[i] = i;
|
|
} else {
|
|
permutationSlotFilled[i] = false;
|
|
newLeadByteUsed[i] = false;
|
|
coll->leadBytePermutationTable[i] = 0;
|
|
}
|
|
}
|
|
|
|
/* Start from the front of the list and place each script we encounter at the
|
|
* earliest possible locatation in the permutation table. If we encounter
|
|
* UNKNOWN, start processing from the back, and place each script in the last
|
|
* possible location. At each step, we also need to make sure that any scripts
|
|
* that need to not be moved are copied to their same location in the final table.
|
|
*/
|
|
for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodesLength; reorderCodesCount++) {
|
|
reorderCodesIndex += fromTheBottom ? 1 : -1;
|
|
int32_t next = internalReorderCodes[reorderCodesIndex];
|
|
if (next == UCOL_REORDER_CODE_IGNORE) {
|
|
continue;
|
|
}
|
|
if (next == USCRIPT_UNKNOWN) {
|
|
if (fromTheBottom == false) {
|
|
// double turnaround
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
coll->reorderCodesLength = 0;
|
|
return;
|
|
}
|
|
fromTheBottom = false;
|
|
reorderCodesIndex = internalReorderCodesLength;
|
|
continue;
|
|
}
|
|
|
|
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll->UCA, next, leadBytes, leadBytesSize);
|
|
if (fromTheBottom) {
|
|
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
|
|
// don't place a lead byte twice in the permutation table
|
|
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
|
// lead byte already used
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
coll->reorderCodesLength = 0;
|
|
return;
|
|
}
|
|
|
|
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
|
|
newLeadByteUsed[toBottom] = true;
|
|
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
|
toBottom++;
|
|
}
|
|
} else {
|
|
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
|
|
// don't place a lead byte twice in the permutation table
|
|
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
|
// lead byte already used
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
|
|
uprv_free(coll->leadBytePermutationTable);
|
|
}
|
|
coll->leadBytePermutationTable = NULL;
|
|
coll->freeLeadBytePermutationTableOnClose = FALSE;
|
|
coll->reorderCodesLength = 0;
|
|
return;
|
|
}
|
|
|
|
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
|
|
newLeadByteUsed[toTop] = true;
|
|
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
|
toTop--;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef REORDER_DEBUG
|
|
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
|
|
for (int i = 0; i < 256; i++) {
|
|
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
|
}
|
|
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
|
|
for (int i = 0; i < 256; i++) {
|
|
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
|
|
}
|
|
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
|
|
for (int i = 0; i < 256; i++) {
|
|
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
|
|
}
|
|
#endif
|
|
|
|
/* Copy everything that's left over */
|
|
int reorderCode = 0;
|
|
for (int i = 0; i < 256; i++) {
|
|
if (!permutationSlotFilled[i]) {
|
|
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
|
|
reorderCode++;
|
|
}
|
|
coll->leadBytePermutationTable[i] = reorderCode;
|
|
permutationSlotFilled[i] = true;
|
|
newLeadByteUsed[reorderCode] = true;
|
|
}
|
|
}
|
|
|
|
#ifdef REORDER_DEBUG
|
|
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
|
|
for (int i = 0; i < 256; i++) {
|
|
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
|
}
|
|
#endif
|
|
|
|
// force a regen of the latin one table since it is affected by the script reordering
|
|
coll->latinOneRegenTable = TRUE;
|
|
ucol_updateInternalState(coll, status);
|
|
}
|
|
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|