gecko/intl/icu/source/i18n/msgfmt.cpp
Jeff Walden 805dd78c93 Bug 924839 - Update our embedded ICU to 52.1, plus a very few local patches. r=lots of people, see subsequent lines in this commit message for the original subcomponents (merged together for landing), and the original bug for the original patch divisions
Bug 924839 - Remove a patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream.  r=glandium
* * *
Bug 924839 - Remove another patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10290 for that.  r=gaston
* * *
Bug 924839 - Remove another patch already in ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10045 for more.  r=Norbert
* * *
Bug 924839 - Remove another patch already applied upstream.  See http://bugs.icu-project.org/trac/changeset/32937 for more.  r=gaston
* * *
Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches.  r=glandium
* * *
Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else.  r=glandium
* * *
Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches).  These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/.  rs=the-usual-suspects
* * *
Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems.  r=gaston
* * *
Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows.  r=glandium
* * *
Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option.  See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing.  r=Norbert
* * *
Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows.  r=Norbert
* * *
Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step).  Thus if the update script is properly run, now, the final result should be no changes at all to the tree.  NOT REVIEWED YET
* * *
Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24.  r=Norbert
2013-11-12 16:23:48 -08:00

1980 lines
68 KiB
C++

/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
* File MSGFMT.CPP
*
* Modification History:
*
* Date Name Description
* 02/19/97 aliu Converted from java.
* 03/20/97 helena Finished first cut of implementation.
* 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
* 06/11/97 helena Fixed addPattern to take the pattern correctly.
* 06/17/97 helena Fixed the getPattern to return the correct pattern.
* 07/09/97 helena Made ParsePosition into a class.
* 02/22/99 stephen Removed character literals for EBCDIC safety
* 11/01/09 kirtig Added SelectFormat
********************************************************************/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/appendable.h"
#include "unicode/choicfmt.h"
#include "unicode/datefmt.h"
#include "unicode/decimfmt.h"
#include "unicode/localpointer.h"
#include "unicode/msgfmt.h"
#include "unicode/plurfmt.h"
#include "unicode/rbnf.h"
#include "unicode/selfmt.h"
#include "unicode/smpdtfmt.h"
#include "unicode/umsg.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "patternprops.h"
#include "messageimpl.h"
#include "msgfmt_impl.h"
#include "plurrule_impl.h"
#include "uassert.h"
#include "uelement.h"
#include "uhash.h"
#include "ustrfmt.h"
#include "util.h"
#include "uvector.h"
// *****************************************************************************
// class MessageFormat
// *****************************************************************************
#define SINGLE_QUOTE ((UChar)0x0027)
#define COMMA ((UChar)0x002C)
#define LEFT_CURLY_BRACE ((UChar)0x007B)
#define RIGHT_CURLY_BRACE ((UChar)0x007D)
//---------------------------------------
// static data
static const UChar ID_NUMBER[] = {
0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
};
static const UChar ID_DATE[] = {
0x64, 0x61, 0x74, 0x65, 0 /* "date" */
};
static const UChar ID_TIME[] = {
0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
};
static const UChar ID_SPELLOUT[] = {
0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
};
static const UChar ID_ORDINAL[] = {
0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
};
static const UChar ID_DURATION[] = {
0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
};
// MessageFormat Type List Number, Date, Time or Choice
static const UChar * const TYPE_IDS[] = {
ID_NUMBER,
ID_DATE,
ID_TIME,
ID_SPELLOUT,
ID_ORDINAL,
ID_DURATION,
NULL,
};
static const UChar ID_EMPTY[] = {
0 /* empty string, used for default so that null can mark end of list */
};
static const UChar ID_CURRENCY[] = {
0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
};
static const UChar ID_PERCENT[] = {
0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
};
static const UChar ID_INTEGER[] = {
0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
};
// NumberFormat modifier list, default, currency, percent or integer
static const UChar * const NUMBER_STYLE_IDS[] = {
ID_EMPTY,
ID_CURRENCY,
ID_PERCENT,
ID_INTEGER,
NULL,
};
static const UChar ID_SHORT[] = {
0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
};
static const UChar ID_MEDIUM[] = {
0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
};
static const UChar ID_LONG[] = {
0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
};
static const UChar ID_FULL[] = {
0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
};
// DateFormat modifier list, default, short, medium, long or full
static const UChar * const DATE_STYLE_IDS[] = {
ID_EMPTY,
ID_SHORT,
ID_MEDIUM,
ID_LONG,
ID_FULL,
NULL,
};
static const icu::DateFormat::EStyle DATE_STYLES[] = {
icu::DateFormat::kDefault,
icu::DateFormat::kShort,
icu::DateFormat::kMedium,
icu::DateFormat::kLong,
icu::DateFormat::kFull,
};
static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
static const UChar NULL_STRING[] = {
0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
};
static const UChar OTHER_STRING[] = {
0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
};
U_CDECL_BEGIN
static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1,
const UHashTok key2) {
return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer);
}
U_CDECL_END
U_NAMESPACE_BEGIN
// -------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
//--------------------------------------------------------------------
/**
* Convert an integer value to a string and append the result to
* the given UnicodeString.
*/
static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
UChar temp[16];
uprv_itou(temp,16,i,10,0); // 10 == radix
appendTo.append(temp, -1);
return appendTo;
}
// AppendableWrapper: encapsulates the result of formatting, keeping track
// of the string and its length.
class AppendableWrapper : public UMemory {
public:
AppendableWrapper(Appendable& appendable) : app(appendable), len(0) {
}
void append(const UnicodeString& s) {
app.appendString(s.getBuffer(), s.length());
len += s.length();
}
void append(const UChar* s, const int32_t sLength) {
app.appendString(s, sLength);
len += sLength;
}
void append(const UnicodeString& s, int32_t start, int32_t length) {
append(s.tempSubString(start, length));
}
void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) {
UnicodeString s;
formatter->format(arg, s, ec);
if (U_SUCCESS(ec)) {
append(s);
}
}
void formatAndAppend(const Format* formatter, const Formattable& arg,
const UnicodeString &argString, UErrorCode& ec) {
if (!argString.isEmpty()) {
if (U_SUCCESS(ec)) {
append(argString);
}
} else {
formatAndAppend(formatter, arg, ec);
}
}
int32_t length() {
return len;
}
private:
Appendable& app;
int32_t len;
};
// -------------------------------------
// Creates a MessageFormat instance based on the pattern.
MessageFormat::MessageFormat(const UnicodeString& pattern,
UErrorCode& success)
: fLocale(Locale::getDefault()), // Uses the default locale
msgPattern(success),
formatAliases(NULL),
formatAliasesCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
hasArgTypeConflicts(FALSE),
defaultNumberFormat(NULL),
defaultDateFormat(NULL),
cachedFormatters(NULL),
customFormatArgStarts(NULL),
pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
{
setLocaleIDs(fLocale.getName(), fLocale.getName());
applyPattern(pattern, success);
}
MessageFormat::MessageFormat(const UnicodeString& pattern,
const Locale& newLocale,
UErrorCode& success)
: fLocale(newLocale),
msgPattern(success),
formatAliases(NULL),
formatAliasesCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
hasArgTypeConflicts(FALSE),
defaultNumberFormat(NULL),
defaultDateFormat(NULL),
cachedFormatters(NULL),
customFormatArgStarts(NULL),
pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
{
setLocaleIDs(fLocale.getName(), fLocale.getName());
applyPattern(pattern, success);
}
MessageFormat::MessageFormat(const UnicodeString& pattern,
const Locale& newLocale,
UParseError& parseError,
UErrorCode& success)
: fLocale(newLocale),
msgPattern(success),
formatAliases(NULL),
formatAliasesCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
hasArgTypeConflicts(FALSE),
defaultNumberFormat(NULL),
defaultDateFormat(NULL),
cachedFormatters(NULL),
customFormatArgStarts(NULL),
pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
{
setLocaleIDs(fLocale.getName(), fLocale.getName());
applyPattern(pattern, parseError, success);
}
MessageFormat::MessageFormat(const MessageFormat& that)
:
Format(that),
fLocale(that.fLocale),
msgPattern(that.msgPattern),
formatAliases(NULL),
formatAliasesCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
hasArgTypeConflicts(that.hasArgTypeConflicts),
defaultNumberFormat(NULL),
defaultDateFormat(NULL),
cachedFormatters(NULL),
customFormatArgStarts(NULL),
pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
{
// This will take care of creating the hash tables (since they are NULL).
UErrorCode ec = U_ZERO_ERROR;
copyObjects(that, ec);
if (U_FAILURE(ec)) {
resetPattern();
}
}
MessageFormat::~MessageFormat()
{
uhash_close(cachedFormatters);
uhash_close(customFormatArgStarts);
uprv_free(argTypes);
uprv_free(formatAliases);
delete defaultNumberFormat;
delete defaultDateFormat;
}
//--------------------------------------------------------------------
// Variable-size array management
/**
* Allocate argTypes[] to at least the given capacity and return
* TRUE if successful. If not, leave argTypes[] unchanged.
*
* If argTypes is NULL, allocate it. If it is not NULL, enlarge it
* if necessary to be at least as large as specified.
*/
UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) {
if (U_FAILURE(status)) {
return FALSE;
}
if (argTypeCapacity >= capacity) {
return TRUE;
}
if (capacity < DEFAULT_INITIAL_CAPACITY) {
capacity = DEFAULT_INITIAL_CAPACITY;
} else if (capacity < 2*argTypeCapacity) {
capacity = 2*argTypeCapacity;
}
Formattable::Type* a = (Formattable::Type*)
uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
if (a == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
argTypes = a;
argTypeCapacity = capacity;
return TRUE;
}
// -------------------------------------
// assignment operator
const MessageFormat&
MessageFormat::operator=(const MessageFormat& that)
{
if (this != &that) {
// Calls the super class for assignment first.
Format::operator=(that);
setLocale(that.fLocale);
msgPattern = that.msgPattern;
hasArgTypeConflicts = that.hasArgTypeConflicts;
UErrorCode ec = U_ZERO_ERROR;
copyObjects(that, ec);
if (U_FAILURE(ec)) {
resetPattern();
}
}
return *this;
}
UBool
MessageFormat::operator==(const Format& rhs) const
{
if (this == &rhs) return TRUE;
MessageFormat& that = (MessageFormat&)rhs;
// Check class ID before checking MessageFormat members
if (!Format::operator==(rhs) ||
msgPattern != that.msgPattern ||
fLocale != that.fLocale) {
return FALSE;
}
// Compare hashtables.
if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) {
return FALSE;
}
if (customFormatArgStarts == NULL) {
return TRUE;
}
UErrorCode ec = U_ZERO_ERROR;
const int32_t count = uhash_count(customFormatArgStarts);
const int32_t rhs_count = uhash_count(that.customFormatArgStarts);
if (count != rhs_count) {
return FALSE;
}
int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1;
for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) {
const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos);
const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos);
if (cur->key.integer != rhs_cur->key.integer) {
return FALSE;
}
const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer);
const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer);
if (*format != *rhs_format) {
return FALSE;
}
}
return TRUE;
}
// -------------------------------------
// Creates a copy of this MessageFormat, the caller owns the copy.
Format*
MessageFormat::clone() const
{
return new MessageFormat(*this);
}
// -------------------------------------
// Sets the locale of this MessageFormat object to theLocale.
void
MessageFormat::setLocale(const Locale& theLocale)
{
if (fLocale != theLocale) {
delete defaultNumberFormat;
defaultNumberFormat = NULL;
delete defaultDateFormat;
defaultDateFormat = NULL;
fLocale = theLocale;
setLocaleIDs(fLocale.getName(), fLocale.getName());
pluralProvider.reset();
ordinalProvider.reset();
}
}
// -------------------------------------
// Gets the locale of this MessageFormat object.
const Locale&
MessageFormat::getLocale() const
{
return fLocale;
}
void
MessageFormat::applyPattern(const UnicodeString& newPattern,
UErrorCode& status)
{
UParseError parseError;
applyPattern(newPattern,parseError,status);
}
// -------------------------------------
// Applies the new pattern and returns an error if the pattern
// is not correct.
void
MessageFormat::applyPattern(const UnicodeString& pattern,
UParseError& parseError,
UErrorCode& ec)
{
if(U_FAILURE(ec)) {
return;
}
msgPattern.parse(pattern, &parseError, ec);
cacheExplicitFormats(ec);
if (U_FAILURE(ec)) {
resetPattern();
}
}
void MessageFormat::resetPattern() {
msgPattern.clear();
uhash_close(cachedFormatters);
cachedFormatters = NULL;
uhash_close(customFormatArgStarts);
customFormatArgStarts = NULL;
argTypeCount = 0;
hasArgTypeConflicts = FALSE;
}
void
MessageFormat::applyPattern(const UnicodeString& pattern,
UMessagePatternApostropheMode aposMode,
UParseError* parseError,
UErrorCode& status) {
if (aposMode != msgPattern.getApostropheMode()) {
msgPattern.clearPatternAndSetApostropheMode(aposMode);
}
applyPattern(pattern, *parseError, status);
}
// -------------------------------------
// Converts this MessageFormat instance to a pattern.
UnicodeString&
MessageFormat::toPattern(UnicodeString& appendTo) const {
if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) ||
0 == msgPattern.countParts()
) {
appendTo.setToBogus();
return appendTo;
}
return appendTo.append(msgPattern.getPatternString());
}
int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const {
if (partIndex != 0) {
partIndex = msgPattern.getLimitPartIndex(partIndex);
}
for (;;) {
UMessagePatternPartType type = msgPattern.getPartType(++partIndex);
if (type == UMSGPAT_PART_TYPE_ARG_START) {
return partIndex;
}
if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
return -1;
}
}
}
void MessageFormat::setArgStartFormat(int32_t argStart,
Format* formatter,
UErrorCode& status) {
if (U_FAILURE(status)) {
delete formatter;
return;
}
if (cachedFormatters == NULL) {
cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
equalFormatsForHash, &status);
if (U_FAILURE(status)) {
delete formatter;
return;
}
uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
if (formatter == NULL) {
formatter = new DummyFormat();
}
uhash_iput(cachedFormatters, argStart, formatter, &status);
}
UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) {
const MessagePattern::Part& part = msgPattern.getPart(partIndex);
return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ?
msgPattern.partSubstringMatches(part, argName) :
part.getValue() == argNumber; // ARG_NUMBER
}
// Sets a custom formatter for a MessagePattern ARG_START part index.
// "Custom" formatters are provided by the user via setFormat() or similar APIs.
void MessageFormat::setCustomArgStartFormat(int32_t argStart,
Format* formatter,
UErrorCode& status) {
setArgStartFormat(argStart, formatter, status);
if (customFormatArgStarts == NULL) {
customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
NULL, &status);
}
uhash_iputi(customFormatArgStarts, argStart, 1, &status);
}
Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const {
if (cachedFormatters == NULL) {
return NULL;
}
void* ptr = uhash_iget(cachedFormatters, argumentNumber);
if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) {
return (Format*) ptr;
} else {
// Not cached, or a DummyFormat representing setFormat(NULL).
return NULL;
}
}
// -------------------------------------
// Adopts the new formats array and updates the array count.
// This MessageFormat instance owns the new formats.
void
MessageFormat::adoptFormats(Format** newFormats,
int32_t count) {
if (newFormats == NULL || count < 0) {
return;
}
// Throw away any cached formatters.
if (cachedFormatters != NULL) {
uhash_removeAll(cachedFormatters);
}
if (customFormatArgStarts != NULL) {
uhash_removeAll(customFormatArgStarts);
}
int32_t formatNumber = 0;
UErrorCode status = U_ZERO_ERROR;
for (int32_t partIndex = 0;
formatNumber < count && U_SUCCESS(status) &&
(partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
setCustomArgStartFormat(partIndex, newFormats[formatNumber], status);
++formatNumber;
}
// Delete those that didn't get used (if any).
for (; formatNumber < count; ++formatNumber) {
delete newFormats[formatNumber];
}
}
// -------------------------------------
// Sets the new formats array and updates the array count.
// This MessageFormat instance maks a copy of the new formats.
void
MessageFormat::setFormats(const Format** newFormats,
int32_t count) {
if (newFormats == NULL || count < 0) {
return;
}
// Throw away any cached formatters.
if (cachedFormatters != NULL) {
uhash_removeAll(cachedFormatters);
}
if (customFormatArgStarts != NULL) {
uhash_removeAll(customFormatArgStarts);
}
UErrorCode status = U_ZERO_ERROR;
int32_t formatNumber = 0;
for (int32_t partIndex = 0;
formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
Format* newFormat = NULL;
if (newFormats[formatNumber] != NULL) {
newFormat = newFormats[formatNumber]->clone();
if (newFormat == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
setCustomArgStartFormat(partIndex, newFormat, status);
++formatNumber;
}
if (U_FAILURE(status)) {
resetPattern();
}
}
// -------------------------------------
// Adopt a single format by format number.
// Do nothing if the format number is not less than the array count.
void
MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
LocalPointer<Format> p(newFormat);
if (n >= 0) {
int32_t formatNumber = 0;
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
if (n == formatNumber) {
UErrorCode status = U_ZERO_ERROR;
setCustomArgStartFormat(partIndex, p.orphan(), status);
return;
}
++formatNumber;
}
}
}
// -------------------------------------
// Adopt a single format by format name.
// Do nothing if there is no match of formatName.
void
MessageFormat::adoptFormat(const UnicodeString& formatName,
Format* formatToAdopt,
UErrorCode& status) {
LocalPointer<Format> p(formatToAdopt);
if (U_FAILURE(status)) {
return;
}
int32_t argNumber = MessagePattern::validateArgumentName(formatName);
if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
for (int32_t partIndex = 0;
(partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
) {
if (argNameMatches(partIndex + 1, formatName, argNumber)) {
Format* f;
if (p.isValid()) {
f = p.orphan();
} else if (formatToAdopt == NULL) {
f = NULL;
} else {
f = formatToAdopt->clone();
if (f == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
setCustomArgStartFormat(partIndex, f, status);
}
}
}
// -------------------------------------
// Set a single format.
// Do nothing if the variable is not less than the array count.
void
MessageFormat::setFormat(int32_t n, const Format& newFormat) {
if (n >= 0) {
int32_t formatNumber = 0;
for (int32_t partIndex = 0;
(partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
if (n == formatNumber) {
Format* new_format = newFormat.clone();
if (new_format) {
UErrorCode status = U_ZERO_ERROR;
setCustomArgStartFormat(partIndex, new_format, status);
}
return;
}
++formatNumber;
}
}
}
// -------------------------------------
// Get a single format by format name.
// Do nothing if the variable is not less than the array count.
Format *
MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
if (U_FAILURE(status) || cachedFormatters == NULL) return NULL;
int32_t argNumber = MessagePattern::validateArgumentName(formatName);
if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
if (argNameMatches(partIndex + 1, formatName, argNumber)) {
return getCachedFormatter(partIndex);
}
}
return NULL;
}
// -------------------------------------
// Set a single format by format name
// Do nothing if the variable is not less than the array count.
void
MessageFormat::setFormat(const UnicodeString& formatName,
const Format& newFormat,
UErrorCode& status) {
if (U_FAILURE(status)) return;
int32_t argNumber = MessagePattern::validateArgumentName(formatName);
if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
for (int32_t partIndex = 0;
(partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
) {
if (argNameMatches(partIndex + 1, formatName, argNumber)) {
if (&newFormat == NULL) {
setCustomArgStartFormat(partIndex, NULL, status);
} else {
Format* new_format = newFormat.clone();
if (new_format == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
setCustomArgStartFormat(partIndex, new_format, status);
}
}
}
}
// -------------------------------------
// Gets the format array.
const Format**
MessageFormat::getFormats(int32_t& cnt) const
{
// This old API returns an array (which we hold) of Format*
// pointers. The array is valid up to the next call to any
// method on this object. We construct and resize an array
// on demand that contains aliases to the subformats[i].format
// pointers.
MessageFormat* t = const_cast<MessageFormat*> (this);
cnt = 0;
if (formatAliases == NULL) {
t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount;
Format** a = (Format**)
uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
if (a == NULL) {
t->formatAliasesCapacity = 0;
return NULL;
}
t->formatAliases = a;
} else if (argTypeCount > formatAliasesCapacity) {
Format** a = (Format**)
uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount);
if (a == NULL) {
t->formatAliasesCapacity = 0;
return NULL;
}
t->formatAliases = a;
t->formatAliasesCapacity = argTypeCount;
}
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
t->formatAliases[cnt++] = getCachedFormatter(partIndex);
}
return (const Format**)formatAliases;
}
UnicodeString MessageFormat::getArgName(int32_t partIndex) {
const MessagePattern::Part& part = msgPattern.getPart(partIndex);
return msgPattern.getSubstring(part);
}
StringEnumeration*
MessageFormat::getFormatNames(UErrorCode& status) {
if (U_FAILURE(status)) return NULL;
UVector *fFormatNames = new UVector(status);
if (U_FAILURE(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
fFormatNames->setDeleter(uprv_deleteUObject);
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status);
}
StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
return nameEnumerator;
}
// -------------------------------------
// Formats the source Formattable array and copy into the result buffer.
// Ignore the FieldPosition result for error checking.
UnicodeString&
MessageFormat::format(const Formattable* source,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition& ignore,
UErrorCode& success) const
{
return format(source, NULL, cnt, appendTo, &ignore, success);
}
// -------------------------------------
// Internally creates a MessageFormat instance based on the
// pattern and formats the arguments Formattable array and
// copy into the appendTo buffer.
UnicodeString&
MessageFormat::format( const UnicodeString& pattern,
const Formattable* arguments,
int32_t cnt,
UnicodeString& appendTo,
UErrorCode& success)
{
MessageFormat temp(pattern, success);
return temp.format(arguments, NULL, cnt, appendTo, NULL, success);
}
// -------------------------------------
// Formats the source Formattable object and copy into the
// appendTo buffer. The Formattable object must be an array
// of Formattable instances, returns error otherwise.
UnicodeString&
MessageFormat::format(const Formattable& source,
UnicodeString& appendTo,
FieldPosition& ignore,
UErrorCode& success) const
{
if (U_FAILURE(success))
return appendTo;
if (source.getType() != Formattable::kArray) {
success = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
int32_t cnt;
const Formattable* tmpPtr = source.getArray(cnt);
return format(tmpPtr, NULL, cnt, appendTo, &ignore, success);
}
UnicodeString&
MessageFormat::format(const UnicodeString* argumentNames,
const Formattable* arguments,
int32_t count,
UnicodeString& appendTo,
UErrorCode& success) const {
return format(arguments, argumentNames, count, appendTo, NULL, success);
}
// Does linear search to find the match for an ArgName.
const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt, UnicodeString& name) const {
for (int32_t i = 0; i < cnt; ++i) {
if (0 == argumentNames[i].compare(name)) {
return arguments + i;
}
}
return NULL;
}
UnicodeString&
MessageFormat::format(const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition* pos,
UErrorCode& status) const {
if (U_FAILURE(status)) {
return appendTo;
}
UnicodeStringAppendable usapp(appendTo);
AppendableWrapper app(usapp);
format(0, NULL, arguments, argumentNames, cnt, app, pos, status);
return appendTo;
}
namespace {
/**
* Mutable input/output values for the PluralSelectorProvider.
* Separate so that it is possible to make MessageFormat Freezable.
*/
class PluralSelectorContext {
public:
PluralSelectorContext(int32_t start, const UnicodeString &name,
const Formattable &num, double off, UErrorCode &errorCode)
: startIndex(start), argName(name), offset(off),
numberArgIndex(-1), formatter(NULL), forReplaceNumber(FALSE) {
// number needs to be set even when select() is not called.
// Keep it as a Number/Formattable:
// For format() methods, and to preserve information (e.g., BigDecimal).
if(off == 0) {
number = num;
} else {
number = num.getDouble(errorCode) - off;
}
}
// Input values for plural selection with decimals.
int32_t startIndex;
const UnicodeString &argName;
/** argument number - plural offset */
Formattable number;
double offset;
// Output values for plural selection with decimals.
/** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
int32_t numberArgIndex;
const Format *formatter;
/** formatted argument number - plural offset */
UnicodeString numberString;
/** TRUE if number-offset was formatted with the stock number formatter */
UBool forReplaceNumber;
};
} // namespace
// if argumentNames is NULL, this means arguments is a numeric array.
// arguments can not be NULL.
// We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
// so that we need not declare the PluralSelectorContext in the public header file.
void MessageFormat::format(int32_t msgStart, const void *plNumber,
const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
AppendableWrapper& appendTo,
FieldPosition* ignore,
UErrorCode& success) const {
if (U_FAILURE(success)) {
return;
}
const UnicodeString& msgString = msgPattern.getPatternString();
int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) {
const MessagePattern::Part* part = &msgPattern.getPart(i);
const UMessagePatternPartType type = part->getType();
int32_t index = part->getIndex();
appendTo.append(msgString, prevIndex, index - prevIndex);
if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
return;
}
prevIndex = part->getLimit();
if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
const PluralSelectorContext &pluralNumber =
*static_cast<const PluralSelectorContext *>(plNumber);
if(pluralNumber.forReplaceNumber) {
// number-offset was already formatted.
appendTo.formatAndAppend(pluralNumber.formatter,
pluralNumber.number, pluralNumber.numberString, success);
} else {
const NumberFormat* nf = getDefaultNumberFormat(success);
appendTo.formatAndAppend(nf, pluralNumber.number, success);
}
continue;
}
if (type != UMSGPAT_PART_TYPE_ARG_START) {
continue;
}
int32_t argLimit = msgPattern.getLimitPartIndex(i);
UMessagePatternArgType argType = part->getArgType();
part = &msgPattern.getPart(++i);
const Formattable* arg;
UBool noArg = FALSE;
UnicodeString argName = msgPattern.getSubstring(*part);
if (argumentNames == NULL) {
int32_t argNumber = part->getValue(); // ARG_NUMBER
if (0 <= argNumber && argNumber < cnt) {
arg = arguments + argNumber;
} else {
arg = NULL;
noArg = TRUE;
}
} else {
arg = getArgFromListByName(arguments, argumentNames, cnt, argName);
if (arg == NULL) {
noArg = TRUE;
}
}
++i;
int32_t prevDestLength = appendTo.length();
const Format* formatter = NULL;
if (noArg) {
appendTo.append(
UnicodeString(LEFT_CURLY_BRACE).append(argName).append(RIGHT_CURLY_BRACE));
} else if (arg == NULL) {
appendTo.append(NULL_STRING, 4);
} else if(plNumber!=NULL &&
static_cast<const PluralSelectorContext *>(plNumber)->numberArgIndex==(i-2)) {
const PluralSelectorContext &pluralNumber =
*static_cast<const PluralSelectorContext *>(plNumber);
if(pluralNumber.offset == 0) {
// The number was already formatted with this formatter.
appendTo.formatAndAppend(pluralNumber.formatter, pluralNumber.number,
pluralNumber.numberString, success);
} else {
// Do not use the formatted (number-offset) string for a named argument
// that formats the number without subtracting the offset.
appendTo.formatAndAppend(pluralNumber.formatter, *arg, success);
}
} else if ((formatter = getCachedFormatter(i -2))) {
// Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
if (dynamic_cast<const ChoiceFormat*>(formatter) ||
dynamic_cast<const PluralFormat*>(formatter) ||
dynamic_cast<const SelectFormat*>(formatter)) {
// We only handle nested formats here if they were provided via
// setFormat() or its siblings. Otherwise they are not cached and instead
// handled below according to argType.
UnicodeString subMsgString;
formatter->format(*arg, subMsgString, success);
if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 ||
(subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern))
) {
MessageFormat subMsgFormat(subMsgString, fLocale, success);
subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, ignore, success);
} else {
appendTo.append(subMsgString);
}
} else {
appendTo.formatAndAppend(formatter, *arg, success);
}
} else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) {
// We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
// This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
// for the hash table containind DummyFormat.
if (arg->isNumeric()) {
const NumberFormat* nf = getDefaultNumberFormat(success);
appendTo.formatAndAppend(nf, *arg, success);
} else if (arg->getType() == Formattable::kDate) {
const DateFormat* df = getDefaultDateFormat(success);
appendTo.formatAndAppend(df, *arg, success);
} else {
appendTo.append(arg->getString(success));
}
} else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
if (!arg->isNumeric()) {
success = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// We must use the Formattable::getDouble() variant with the UErrorCode parameter
// because only this one converts non-double numeric types to double.
const double number = arg->getDouble(success);
int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number);
formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
cnt, appendTo, success);
} else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) {
if (!arg->isNumeric()) {
success = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
const PluralSelectorProvider &selector =
argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider;
// We must use the Formattable::getDouble() variant with the UErrorCode parameter
// because only this one converts non-double numeric types to double.
double offset = msgPattern.getPluralOffset(i);
PluralSelectorContext context(i, argName, *arg, offset, success);
int32_t subMsgStart = PluralFormat::findSubMessage(
msgPattern, i, selector, &context, arg->getDouble(success), success);
formatComplexSubMessage(subMsgStart, &context, arguments, argumentNames,
cnt, appendTo, success);
} else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success);
formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
cnt, appendTo, success);
} else {
// This should never happen.
success = U_INTERNAL_PROGRAM_ERROR;
return;
}
ignore = updateMetaData(appendTo, prevDestLength, ignore, arg);
prevIndex = msgPattern.getPart(argLimit).getLimit();
i = argLimit;
}
}
void MessageFormat::formatComplexSubMessage(int32_t msgStart,
const void *plNumber,
const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
AppendableWrapper& appendTo,
UErrorCode& success) const {
if (U_FAILURE(success)) {
return;
}
if (!MessageImpl::jdkAposMode(msgPattern)) {
format(msgStart, plNumber, arguments, argumentNames, cnt, appendTo, NULL, success);
return;
}
// JDK compatibility mode: (see JDK MessageFormat.format() API docs)
// - remove SKIP_SYNTAX; that is, remove half of the apostrophes
// - if the result string contains an open curly brace '{' then
// instantiate a temporary MessageFormat object and format again;
// otherwise just append the result string
const UnicodeString& msgString = msgPattern.getPatternString();
UnicodeString sb;
int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
for (int32_t i = msgStart;;) {
const MessagePattern::Part& part = msgPattern.getPart(++i);
const UMessagePatternPartType type = part.getType();
int32_t index = part.getIndex();
if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
sb.append(msgString, prevIndex, index - prevIndex);
break;
} else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
sb.append(msgString, prevIndex, index - prevIndex);
if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
const PluralSelectorContext &pluralNumber =
*static_cast<const PluralSelectorContext *>(plNumber);
if(pluralNumber.forReplaceNumber) {
// number-offset was already formatted.
sb.append(pluralNumber.numberString);
} else {
const NumberFormat* nf = getDefaultNumberFormat(success);
sb.append(nf->format(pluralNumber.number, sb, success));
}
}
prevIndex = part.getLimit();
} else if (type == UMSGPAT_PART_TYPE_ARG_START) {
sb.append(msgString, prevIndex, index - prevIndex);
prevIndex = index;
i = msgPattern.getLimitPartIndex(i);
index = msgPattern.getPart(i).getLimit();
MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb);
prevIndex = index;
}
}
if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) {
UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
MessageFormat subMsgFormat(emptyPattern, fLocale, success);
subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success);
subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, NULL, success);
} else {
appendTo.append(sb);
}
}
UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const {
const UnicodeString& msgString=msgPattern.getPatternString();
int32_t prevIndex=msgPattern.getPart(from).getLimit();
UnicodeString b;
for (int32_t i = from + 1; ; ++i) {
const MessagePattern::Part& part = msgPattern.getPart(i);
const UMessagePatternPartType type=part.getType();
int32_t index=part.getIndex();
b.append(msgString, prevIndex, index - prevIndex);
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
return b;
}
// Unexpected Part "part" in parsed message.
U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR);
prevIndex=part.getLimit();
}
}
FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/,
FieldPosition* /*fp*/, const Formattable* /*argId*/) const {
// Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
return NULL;
/*
if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
fp->setBeginIndex(prevLength);
fp->setEndIndex(dest.get_length());
return NULL;
}
return fp;
*/
}
int32_t
MessageFormat::findOtherSubMessage(int32_t partIndex) const {
int32_t count=msgPattern.countParts();
const MessagePattern::Part *part = &msgPattern.getPart(partIndex);
if(MessagePattern::Part::hasNumericValue(part->getType())) {
++partIndex;
}
// Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
// until ARG_LIMIT or end of plural-only pattern.
UnicodeString other(FALSE, OTHER_STRING, 5);
do {
part=&msgPattern.getPart(partIndex++);
UMessagePatternPartType type=part->getType();
if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
break;
}
U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
// part is an ARG_SELECTOR followed by an optional explicit value, and then a message
if(msgPattern.partSubstringMatches(*part, other)) {
return partIndex;
}
if(MessagePattern::Part::hasNumericValue(msgPattern.getPartType(partIndex))) {
++partIndex; // skip the numeric-value part of "=1" etc.
}
partIndex=msgPattern.getLimitPartIndex(partIndex);
} while(++partIndex<count);
return 0;
}
int32_t
MessageFormat::findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const {
for(int32_t i=msgStart+1;; ++i) {
const MessagePattern::Part &part=msgPattern.getPart(i);
UMessagePatternPartType type=part.getType();
if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
return 0;
}
if(type==UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
return -1;
}
if(type==UMSGPAT_PART_TYPE_ARG_START) {
UMessagePatternArgType argType=part.getArgType();
if(!argName.isEmpty() && (argType==UMSGPAT_ARG_TYPE_NONE || argType==UMSGPAT_ARG_TYPE_SIMPLE)) {
// ARG_NUMBER or ARG_NAME
if(msgPattern.partSubstringMatches(msgPattern.getPart(i+1), argName)) {
return i;
}
}
i=msgPattern.getLimitPartIndex(i);
}
}
}
void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
// Deep copy pointer fields.
// We need not copy the formatAliases because they are re-filled
// in each getFormats() call.
// The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
// also get created on demand.
argTypeCount = that.argTypeCount;
if (argTypeCount > 0) {
if (!allocateArgTypes(argTypeCount, ec)) {
return;
}
uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0]));
}
if (cachedFormatters != NULL) {
uhash_removeAll(cachedFormatters);
}
if (customFormatArgStarts != NULL) {
uhash_removeAll(customFormatArgStarts);
}
if (that.cachedFormatters) {
if (cachedFormatters == NULL) {
cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
equalFormatsForHash, &ec);
if (U_FAILURE(ec)) {
return;
}
uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
const int32_t count = uhash_count(that.cachedFormatters);
int32_t pos, idx;
for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos);
Format* newFormat = ((Format*)(cur->value.pointer))->clone();
if (newFormat) {
uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec);
} else {
ec = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
}
if (that.customFormatArgStarts) {
if (customFormatArgStarts == NULL) {
customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
NULL, &ec);
}
const int32_t count = uhash_count(that.customFormatArgStarts);
int32_t pos, idx;
for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos);
uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec);
}
}
}
Formattable*
MessageFormat::parse(int32_t msgStart,
const UnicodeString& source,
ParsePosition& pos,
int32_t& count,
UErrorCode& ec) const {
count = 0;
if (U_FAILURE(ec)) {
pos.setErrorIndex(pos.getIndex());
return NULL;
}
// parse() does not work with named arguments.
if (msgPattern.hasNamedArguments()) {
ec = U_ARGUMENT_TYPE_MISMATCH;
pos.setErrorIndex(pos.getIndex());
return NULL;
}
LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]);
const UnicodeString& msgString=msgPattern.getPatternString();
int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
int32_t sourceOffset = pos.getIndex();
ParsePosition tempStatus(0);
for(int32_t i=msgStart+1; ; ++i) {
UBool haveArgResult = FALSE;
const MessagePattern::Part* part=&msgPattern.getPart(i);
const UMessagePatternPartType type=part->getType();
int32_t index=part->getIndex();
// Make sure the literal string matches.
int32_t len = index - prevIndex;
if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) {
sourceOffset += len;
prevIndex += len;
} else {
pos.setErrorIndex(sourceOffset);
return NULL; // leave index as is to signal error
}
if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
// Things went well! Done.
pos.setIndex(sourceOffset);
return resultArray.orphan();
}
if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) {
prevIndex=part->getLimit();
continue;
}
// We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
// Unexpected Part "part" in parsed message.
U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START);
int32_t argLimit=msgPattern.getLimitPartIndex(i);
UMessagePatternArgType argType=part->getArgType();
part=&msgPattern.getPart(++i);
int32_t argNumber = part->getValue(); // ARG_NUMBER
UnicodeString key;
++i;
const Format* formatter = NULL;
Formattable& argResult = resultArray[argNumber];
if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) {
// Just parse using the formatter.
tempStatus.setIndex(sourceOffset);
formatter->parseObject(source, argResult, tempStatus);
if (tempStatus.getIndex() == sourceOffset) {
pos.setErrorIndex(sourceOffset);
return NULL; // leave index as is to signal error
}
sourceOffset = tempStatus.getIndex();
haveArgResult = TRUE;
} else if(
argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) {
// We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
// This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
// for the hash table containind DummyFormat.
// Match as a string.
// if at end, use longest possible match
// otherwise uses first match to intervening string
// does NOT recursively try all possibilities
UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit);
int32_t next;
if (!stringAfterArgument.isEmpty()) {
next = source.indexOf(stringAfterArgument, sourceOffset);
} else {
next = source.length();
}
if (next < 0) {
pos.setErrorIndex(sourceOffset);
return NULL; // leave index as is to signal error
} else {
UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset));
UnicodeString compValue;
compValue.append(LEFT_CURLY_BRACE);
itos(argNumber, compValue);
compValue.append(RIGHT_CURLY_BRACE);
if (0 != strValue.compare(compValue)) {
argResult.setString(strValue);
haveArgResult = TRUE;
}
sourceOffset = next;
}
} else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
tempStatus.setIndex(sourceOffset);
double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus);
if (tempStatus.getIndex() == sourceOffset) {
pos.setErrorIndex(sourceOffset);
return NULL; // leave index as is to signal error
}
argResult.setDouble(choiceResult);
haveArgResult = TRUE;
sourceOffset = tempStatus.getIndex();
} else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) {
// Parsing not supported.
ec = U_UNSUPPORTED_ERROR;
return NULL;
} else {
// This should never happen.
ec = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
if (haveArgResult && count <= argNumber) {
count = argNumber + 1;
}
prevIndex=msgPattern.getPart(argLimit).getLimit();
i=argLimit;
}
}
// -------------------------------------
// Parses the source pattern and returns the Formattable objects array,
// the array count and the ending parse position. The caller of this method
// owns the array.
Formattable*
MessageFormat::parse(const UnicodeString& source,
ParsePosition& pos,
int32_t& count) const {
UErrorCode ec = U_ZERO_ERROR;
return parse(0, source, pos, count, ec);
}
// -------------------------------------
// Parses the source string and returns the array of
// Formattable objects and the array count. The caller
// owns the returned array.
Formattable*
MessageFormat::parse(const UnicodeString& source,
int32_t& cnt,
UErrorCode& success) const
{
if (msgPattern.hasNamedArguments()) {
success = U_ARGUMENT_TYPE_MISMATCH;
return NULL;
}
ParsePosition status(0);
// Calls the actual implementation method and starts
// from zero offset of the source text.
Formattable* result = parse(source, status, cnt);
if (status.getIndex() == 0) {
success = U_MESSAGE_PARSE_ERROR;
delete[] result;
return NULL;
}
return result;
}
// -------------------------------------
// Parses the source text and copy into the result buffer.
void
MessageFormat::parseObject( const UnicodeString& source,
Formattable& result,
ParsePosition& status) const
{
int32_t cnt = 0;
Formattable* tmpResult = parse(source, status, cnt);
if (tmpResult != NULL)
result.adoptArray(tmpResult, cnt);
}
UnicodeString
MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
UnicodeString result;
if (U_SUCCESS(status)) {
int32_t plen = pattern.length();
const UChar* pat = pattern.getBuffer();
int32_t blen = plen * 2 + 1; // space for null termination, convenience
UChar* buf = result.getBuffer(blen);
if (buf == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
result.releaseBuffer(U_SUCCESS(status) ? len : 0);
}
}
if (U_FAILURE(status)) {
result.setToBogus();
}
return result;
}
// -------------------------------------
static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
if (fmt == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
} else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
}
return fmt;
}
void MessageFormat::cacheExplicitFormats(UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (cachedFormatters != NULL) {
uhash_removeAll(cachedFormatters);
}
if (customFormatArgStarts != NULL) {
uhash_removeAll(customFormatArgStarts);
}
// The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
// which we need not examine.
int32_t limit = msgPattern.countParts() - 2;
argTypeCount = 0;
// We also need not look at the first two "parts"
// (at most MSG_START and ARG_START) in this loop.
// We determine the argTypeCount first so that we can allocateArgTypes
// so that the next loop can set argTypes[argNumber].
// (This is for the C API which needs the argTypes to read its va_arg list.)
for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) {
const MessagePattern::Part& part = msgPattern.getPart(i);
if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
const int argNumber = part.getValue();
if (argNumber >= argTypeCount) {
argTypeCount = argNumber + 1;
}
}
}
if (!allocateArgTypes(argTypeCount, status)) {
return;
}
// Set all argTypes to kObject, as a "none" value, for lack of any better value.
// We never use kObject for real arguments.
// We use it as "no argument yet" for the check for hasArgTypeConflicts.
for (int32_t i = 0; i < argTypeCount; ++i) {
argTypes[i] = Formattable::kObject;
}
hasArgTypeConflicts = FALSE;
// This loop starts at part index 1 because we do need to examine
// ARG_START parts. (But we can ignore the MSG_START.)
for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) {
const MessagePattern::Part* part = &msgPattern.getPart(i);
if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) {
continue;
}
UMessagePatternArgType argType = part->getArgType();
int32_t argNumber = -1;
part = &msgPattern.getPart(i + 1);
if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
argNumber = part->getValue();
}
Formattable::Type formattableType;
switch (argType) {
case UMSGPAT_ARG_TYPE_NONE:
formattableType = Formattable::kString;
break;
case UMSGPAT_ARG_TYPE_SIMPLE: {
int32_t index = i;
i += 2;
UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++));
UnicodeString style;
if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
style = msgPattern.getSubstring(*part);
++i;
}
UParseError parseError;
Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status);
setArgStartFormat(index, formatter, status);
break;
}
case UMSGPAT_ARG_TYPE_CHOICE:
case UMSGPAT_ARG_TYPE_PLURAL:
case UMSGPAT_ARG_TYPE_SELECTORDINAL:
formattableType = Formattable::kDouble;
break;
case UMSGPAT_ARG_TYPE_SELECT:
formattableType = Formattable::kString;
break;
default:
status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable.
formattableType = Formattable::kString;
break;
}
if (argNumber != -1) {
if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) {
hasArgTypeConflicts = TRUE;
}
argTypes[argNumber] = formattableType;
}
}
}
Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style,
Formattable::Type& formattableType, UParseError& parseError,
UErrorCode& ec) {
if (U_FAILURE(ec)) {
return NULL;
}
Format* fmt = NULL;
int32_t typeID, styleID;
DateFormat::EStyle date_style;
switch (typeID = findKeyword(type, TYPE_IDS)) {
case 0: // number
formattableType = Formattable::kDouble;
switch (findKeyword(style, NUMBER_STYLE_IDS)) {
case 0: // default
fmt = NumberFormat::createInstance(fLocale, ec);
break;
case 1: // currency
fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
break;
case 2: // percent
fmt = NumberFormat::createPercentInstance(fLocale, ec);
break;
case 3: // integer
formattableType = Formattable::kLong;
fmt = createIntegerFormat(fLocale, ec);
break;
default: // pattern
fmt = NumberFormat::createInstance(fLocale, ec);
if (fmt) {
DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
if (decfmt != NULL) {
decfmt->applyPattern(style,parseError,ec);
}
}
break;
}
break;
case 1: // date
case 2: // time
formattableType = Formattable::kDate;
styleID = findKeyword(style, DATE_STYLE_IDS);
date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
if (typeID == 1) {
fmt = DateFormat::createDateInstance(date_style, fLocale);
} else {
fmt = DateFormat::createTimeInstance(date_style, fLocale);
}
if (styleID < 0 && fmt != NULL) {
SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
if (sdtfmt != NULL) {
sdtfmt->applyPattern(style);
}
}
break;
case 3: // spellout
formattableType = Formattable::kDouble;
fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec);
break;
case 4: // ordinal
formattableType = Formattable::kDouble;
fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec);
break;
case 5: // duration
formattableType = Formattable::kDouble;
fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec);
break;
default:
formattableType = Formattable::kString;
ec = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
return fmt;
}
//-------------------------------------
// Finds the string, s, in the string array, list.
int32_t MessageFormat::findKeyword(const UnicodeString& s,
const UChar * const *list)
{
if (s.isEmpty()) {
return 0; // default
}
int32_t length = s.length();
const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length);
UnicodeString buffer(FALSE, ps, length);
// Trims the space characters and turns all characters
// in s to lower case.
buffer.toLower("");
for (int32_t i = 0; list[i]; ++i) {
if (!buffer.compare(list[i], u_strlen(list[i]))) {
return i;
}
}
return -1;
}
/**
* Convenience method that ought to be in NumberFormat
*/
NumberFormat*
MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
NumberFormat *temp = NumberFormat::createInstance(locale, status);
DecimalFormat *temp2;
if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
temp2->setMaximumFractionDigits(0);
temp2->setDecimalSeparatorAlwaysShown(FALSE);
temp2->setParseIntegerOnly(TRUE);
}
return temp;
}
/**
* Return the default number format. Used to format a numeric
* argument when subformats[i].format is NULL. Returns NULL
* on failure.
*
* Semantically const but may modify *this.
*/
const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
if (defaultNumberFormat == NULL) {
MessageFormat* t = (MessageFormat*) this;
t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
if (U_FAILURE(ec)) {
delete t->defaultNumberFormat;
t->defaultNumberFormat = NULL;
} else if (t->defaultNumberFormat == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
}
return defaultNumberFormat;
}
/**
* Return the default date format. Used to format a date
* argument when subformats[i].format is NULL. Returns NULL
* on failure.
*
* Semantically const but may modify *this.
*/
const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
if (defaultDateFormat == NULL) {
MessageFormat* t = (MessageFormat*) this;
t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
if (t->defaultDateFormat == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
}
return defaultDateFormat;
}
UBool
MessageFormat::usesNamedArguments() const {
return msgPattern.hasNamedArguments();
}
int32_t
MessageFormat::getArgTypeCount() const {
return argTypeCount;
}
UBool MessageFormat::equalFormats(const void* left, const void* right) {
return *(const Format*)left==*(const Format*)right;
}
UBool MessageFormat::DummyFormat::operator==(const Format&) const {
return TRUE;
}
Format* MessageFormat::DummyFormat::clone() const {
return new DummyFormat();
}
UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
UnicodeString& appendTo,
UErrorCode& status) const {
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
return appendTo;
}
UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
UnicodeString& appendTo,
FieldPosition&,
UErrorCode& status) const {
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
return appendTo;
}
UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
UnicodeString& appendTo,
FieldPositionIterator*,
UErrorCode& status) const {
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
return appendTo;
}
void MessageFormat::DummyFormat::parseObject(const UnicodeString&,
Formattable&,
ParsePosition& ) const {
}
FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
pos=0;
fFormatNames = fNameList;
}
const UnicodeString*
FormatNameEnumeration::snext(UErrorCode& status) {
if (U_SUCCESS(status) && pos < fFormatNames->size()) {
return (const UnicodeString*)fFormatNames->elementAt(pos++);
}
return NULL;
}
void
FormatNameEnumeration::reset(UErrorCode& /*status*/) {
pos=0;
}
int32_t
FormatNameEnumeration::count(UErrorCode& /*status*/) const {
return (fFormatNames==NULL) ? 0 : fFormatNames->size();
}
FormatNameEnumeration::~FormatNameEnumeration() {
delete fFormatNames;
}
MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t)
: msgFormat(mf), rules(NULL), type(t) {
}
MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
delete rules;
}
UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double number,
UErrorCode& ec) const {
if (U_FAILURE(ec)) {
return UnicodeString(FALSE, OTHER_STRING, 5);
}
MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this);
if(rules == NULL) {
t->rules = PluralRules::forLocale(msgFormat.fLocale, type, ec);
if (U_FAILURE(ec)) {
return UnicodeString(FALSE, OTHER_STRING, 5);
}
}
// Select a sub-message according to how the number is formatted,
// which is specified in the selected sub-message.
// We avoid this circle by looking at how
// the number is formatted in the "other" sub-message
// which must always be present and usually contains the number.
// Message authors should be consistent across sub-messages.
PluralSelectorContext &context = *static_cast<PluralSelectorContext *>(ctx);
int32_t otherIndex = msgFormat.findOtherSubMessage(context.startIndex);
context.numberArgIndex = msgFormat.findFirstPluralNumberArg(otherIndex, context.argName);
if(context.numberArgIndex > 0 && msgFormat.cachedFormatters != NULL) {
context.formatter =
(const Format*)uhash_iget(msgFormat.cachedFormatters, context.numberArgIndex);
}
if(context.formatter == NULL) {
context.formatter = msgFormat.getDefaultNumberFormat(ec);
context.forReplaceNumber = TRUE;
}
U_ASSERT(context.number.getDouble(ec) == number); // argument number minus the offset
context.formatter->format(context.number, context.numberString, ec);
const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
if(decFmt != NULL) {
FixedDecimal dec = decFmt->getFixedDecimal(context.number, ec);
return rules->select(dec);
} else {
return rules->select(number);
}
}
void MessageFormat::PluralSelectorProvider::reset() {
delete rules;
rules = NULL;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
//eof