/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsTextRunTransformations.h" #include "mozilla/MemoryReporting.h" #include "nsGkAtoms.h" #include "nsStyleConsts.h" #include "nsStyleContext.h" #include "nsUnicodeProperties.h" #include "nsSpecialCasingData.h" #include "mozilla/gfx/2D.h" #include "nsTextFrameUtils.h" #include "nsIPersistentProperties2.h" #include "nsNetUtil.h" // Unicode characters needing special casing treatment in tr/az languages #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 // Greek sigma needs custom handling for the lowercase transform; for details // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120. #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 #define GREEK_SMALL_LETTER_SIGMA 0x03C3 // Custom uppercase mapping for Greek; see bug 307039 for details #define GREEK_LOWER_ALPHA 0x03B1 #define GREEK_LOWER_ALPHA_TONOS 0x03AC #define GREEK_LOWER_ALPHA_OXIA 0x1F71 #define GREEK_LOWER_EPSILON 0x03B5 #define GREEK_LOWER_EPSILON_TONOS 0x03AD #define GREEK_LOWER_EPSILON_OXIA 0x1F73 #define GREEK_LOWER_ETA 0x03B7 #define GREEK_LOWER_ETA_TONOS 0x03AE #define GREEK_LOWER_ETA_OXIA 0x1F75 #define GREEK_LOWER_IOTA 0x03B9 #define GREEK_LOWER_IOTA_TONOS 0x03AF #define GREEK_LOWER_IOTA_OXIA 0x1F77 #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3 #define GREEK_LOWER_OMICRON 0x03BF #define GREEK_LOWER_OMICRON_TONOS 0x03CC #define GREEK_LOWER_OMICRON_OXIA 0x1F79 #define GREEK_LOWER_UPSILON 0x03C5 #define GREEK_LOWER_UPSILON_TONOS 0x03CD #define GREEK_LOWER_UPSILON_OXIA 0x1F7B #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3 #define GREEK_LOWER_OMEGA 0x03C9 #define GREEK_LOWER_OMEGA_TONOS 0x03CE #define GREEK_LOWER_OMEGA_OXIA 0x1F7D #define GREEK_UPPER_ALPHA 0x0391 #define GREEK_UPPER_EPSILON 0x0395 #define GREEK_UPPER_ETA 0x0397 #define GREEK_UPPER_IOTA 0x0399 #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA #define GREEK_UPPER_OMICRON 0x039F #define GREEK_UPPER_UPSILON 0x03A5 #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB #define GREEK_UPPER_OMEGA 0x03A9 #define GREEK_UPPER_ALPHA_TONOS 0x0386 #define GREEK_UPPER_ALPHA_OXIA 0x1FBB #define GREEK_UPPER_EPSILON_TONOS 0x0388 #define GREEK_UPPER_EPSILON_OXIA 0x1FC9 #define GREEK_UPPER_ETA_TONOS 0x0389 #define GREEK_UPPER_ETA_OXIA 0x1FCB #define GREEK_UPPER_IOTA_TONOS 0x038A #define GREEK_UPPER_IOTA_OXIA 0x1FDB #define GREEK_UPPER_OMICRON_TONOS 0x038C #define GREEK_UPPER_OMICRON_OXIA 0x1FF9 #define GREEK_UPPER_UPSILON_TONOS 0x038E #define GREEK_UPPER_UPSILON_OXIA 0x1FEB #define GREEK_UPPER_OMEGA_TONOS 0x038F #define GREEK_UPPER_OMEGA_OXIA 0x1FFB #define COMBINING_ACUTE_ACCENT 0x0301 #define COMBINING_DIAERESIS 0x0308 #define COMBINING_ACUTE_TONE_MARK 0x0341 #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344 // When doing an Uppercase transform in Greek, we need to keep track of the // current state while iterating through the string, to recognize and process // diphthongs correctly. For clarity, we define a state for each vowel and // each vowel with accent, although a few of these do not actually need any // special treatment and could be folded into kStart. enum GreekCasingState { kStart, kAlpha, kEpsilon, kEta, kIota, kOmicron, kUpsilon, kOmega, kAlphaAcc, kEpsilonAcc, kEtaAcc, kIotaAcc, kOmicronAcc, kUpsilonAcc, kOmegaAcc, kOmicronUpsilon, kDiaeresis }; static uint32_t GreekUpperCase(uint32_t aCh, GreekCasingState* aState) { switch (aCh) { case GREEK_UPPER_ALPHA: case GREEK_LOWER_ALPHA: *aState = kAlpha; return GREEK_UPPER_ALPHA; case GREEK_UPPER_EPSILON: case GREEK_LOWER_EPSILON: *aState = kEpsilon; return GREEK_UPPER_EPSILON; case GREEK_UPPER_ETA: case GREEK_LOWER_ETA: *aState = kEta; return GREEK_UPPER_ETA; case GREEK_UPPER_IOTA: *aState = kIota; return GREEK_UPPER_IOTA; case GREEK_UPPER_OMICRON: case GREEK_LOWER_OMICRON: *aState = kOmicron; return GREEK_UPPER_OMICRON; case GREEK_UPPER_UPSILON: switch (*aState) { case kOmicron: *aState = kOmicronUpsilon; break; default: *aState = kUpsilon; break; } return GREEK_UPPER_UPSILON; case GREEK_UPPER_OMEGA: case GREEK_LOWER_OMEGA: *aState = kOmega; return GREEK_UPPER_OMEGA; // iota and upsilon may be the second vowel of a diphthong case GREEK_LOWER_IOTA: switch (*aState) { case kAlphaAcc: case kEpsilonAcc: case kOmicronAcc: case kUpsilonAcc: *aState = kStart; return GREEK_UPPER_IOTA_DIALYTIKA; default: break; } *aState = kIota; return GREEK_UPPER_IOTA; case GREEK_LOWER_UPSILON: switch (*aState) { case kAlphaAcc: case kEpsilonAcc: case kEtaAcc: case kOmicronAcc: *aState = kStart; return GREEK_UPPER_UPSILON_DIALYTIKA; case kOmicron: *aState = kOmicronUpsilon; break; default: *aState = kUpsilon; break; } return GREEK_UPPER_UPSILON; case GREEK_UPPER_IOTA_DIALYTIKA: case GREEK_LOWER_IOTA_DIALYTIKA: case GREEK_UPPER_UPSILON_DIALYTIKA: case GREEK_LOWER_UPSILON_DIALYTIKA: case COMBINING_DIAERESIS: *aState = kDiaeresis; return ToUpperCase(aCh); // remove accent if it follows a vowel or diaeresis, // and set appropriate state for diphthong detection case COMBINING_ACUTE_ACCENT: case COMBINING_ACUTE_TONE_MARK: switch (*aState) { case kAlpha: *aState = kAlphaAcc; return uint32_t(-1); // omit this char from result string case kEpsilon: *aState = kEpsilonAcc; return uint32_t(-1); case kEta: *aState = kEtaAcc; return uint32_t(-1); case kIota: *aState = kIotaAcc; return uint32_t(-1); case kOmicron: *aState = kOmicronAcc; return uint32_t(-1); case kUpsilon: *aState = kUpsilonAcc; return uint32_t(-1); case kOmicronUpsilon: *aState = kStart; // this completed a diphthong return uint32_t(-1); case kOmega: *aState = kOmegaAcc; return uint32_t(-1); case kDiaeresis: *aState = kStart; return uint32_t(-1); default: break; } break; // combinations with dieresis+accent just strip the accent, // and reset to start state (don't form diphthong with following vowel) case GREEK_LOWER_IOTA_DIALYTIKA_TONOS: case GREEK_LOWER_IOTA_DIALYTIKA_OXIA: *aState = kStart; return GREEK_UPPER_IOTA_DIALYTIKA; case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS: case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA: *aState = kStart; return GREEK_UPPER_UPSILON_DIALYTIKA; case COMBINING_GREEK_DIALYTIKA_TONOS: *aState = kStart; return COMBINING_DIAERESIS; // strip accents from vowels, and note the vowel seen so that we can detect // diphthongs where diaeresis needs to be added case GREEK_LOWER_ALPHA_TONOS: case GREEK_LOWER_ALPHA_OXIA: case GREEK_UPPER_ALPHA_TONOS: case GREEK_UPPER_ALPHA_OXIA: *aState = kAlphaAcc; return GREEK_UPPER_ALPHA; case GREEK_LOWER_EPSILON_TONOS: case GREEK_LOWER_EPSILON_OXIA: case GREEK_UPPER_EPSILON_TONOS: case GREEK_UPPER_EPSILON_OXIA: *aState = kEpsilonAcc; return GREEK_UPPER_EPSILON; case GREEK_LOWER_ETA_TONOS: case GREEK_LOWER_ETA_OXIA: case GREEK_UPPER_ETA_TONOS: case GREEK_UPPER_ETA_OXIA: *aState = kEtaAcc; return GREEK_UPPER_ETA; case GREEK_LOWER_IOTA_TONOS: case GREEK_LOWER_IOTA_OXIA: case GREEK_UPPER_IOTA_TONOS: case GREEK_UPPER_IOTA_OXIA: *aState = kIotaAcc; return GREEK_UPPER_IOTA; case GREEK_LOWER_OMICRON_TONOS: case GREEK_LOWER_OMICRON_OXIA: case GREEK_UPPER_OMICRON_TONOS: case GREEK_UPPER_OMICRON_OXIA: *aState = kOmicronAcc; return GREEK_UPPER_OMICRON; case GREEK_LOWER_UPSILON_TONOS: case GREEK_LOWER_UPSILON_OXIA: case GREEK_UPPER_UPSILON_TONOS: case GREEK_UPPER_UPSILON_OXIA: switch (*aState) { case kOmicron: *aState = kStart; // this completed a diphthong break; default: *aState = kUpsilonAcc; break; } return GREEK_UPPER_UPSILON; case GREEK_LOWER_OMEGA_TONOS: case GREEK_LOWER_OMEGA_OXIA: case GREEK_UPPER_OMEGA_TONOS: case GREEK_UPPER_OMEGA_OXIA: *aState = kOmegaAcc; return GREEK_UPPER_OMEGA; } // all other characters just reset the state, and use standard mappings *aState = kStart; return ToUpperCase(aCh); } nsTransformedTextRun * nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams, nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup, const char16_t* aString, uint32_t aLength, const uint32_t aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT), "didn't expect text to be marked as 8-bit here"); void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); if (!storage) { return nullptr; } return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength, aFlags, aStyles, aOwnsFactory); } void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalization, gfxContext* aRefContext) { if (mCapitalize.IsEmpty()) { if (!mCapitalize.AppendElements(GetLength())) return; memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool)); } memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool)); mNeedsRebuild = true; } bool nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength, uint8_t* aBreakBefore, gfxContext* aRefContext) { bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength, aBreakBefore, aRefContext); if (changed) { mNeedsRebuild = true; } return changed; } size_t nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) { size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); total += mStyles.SizeOfExcludingThis(aMallocSizeOf); total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf); if (mOwnsFactory) { total += aMallocSizeOf(mFactory); } return total; } size_t nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) { return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); } nsTransformedTextRun* nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, uint32_t aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString, aLength, aFlags, aStyles, aOwnsFactory); } nsTransformedTextRun* nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, uint32_t aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { // We'll only have a Unicode code path to minimize the amount of code needed // for these rarely used features NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast(aString), aLength); return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT), aStyles, aOwnsFactory); } void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, const bool* aCharsToMerge, const bool* aDeletedChars) { aDest->ResetGlyphRuns(); gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength()); uint32_t offset = 0; nsAutoTArray glyphs; while (iter.NextRun()) { gfxTextRun::GlyphRun* run = iter.GetGlyphRun(); nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false); if (NS_FAILED(rv)) return; bool anyMissing = false; uint32_t mergeRunStart = iter.GetStringStart(); const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs(); gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; uint32_t stringEnd = iter.GetStringEnd(); for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) { const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; if (g.IsSimpleGlyph()) { if (!anyMissing) { gfxTextRun::DetailedGlyph details; details.mGlyphID = g.GetSimpleGlyph(); details.mAdvance = g.GetSimpleAdvance(); details.mXOffset = 0; details.mYOffset = 0; glyphs.AppendElement(details); } } else { if (g.IsMissing()) { anyMissing = true; glyphs.Clear(); } if (g.GetGlyphCount() > 0) { glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); } } if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) { // next char is supposed to merge with current, so loop without // writing current merged glyph to the destination continue; } // If the start of the merge run is actually a character that should // have been merged with the previous character (this can happen // if there's a font change in the middle of a case-mapped character, // that decomposed into a sequence of base+diacritics, for example), // just discard the entire merge run. See comment at start of this // function. NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart], "unable to merge across a glyph run boundary, " "glyph(s) discarded"); if (!aCharsToMerge[mergeRunStart]) { if (anyMissing) { mergedGlyph.SetMissing(glyphs.Length()); } else { mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), mergedGlyph.IsLigatureGroupStart(), glyphs.Length()); } aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements()); ++offset; while (offset < aDest->GetLength() && aDeletedChars[offset]) { aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr); } } glyphs.Clear(); anyMissing = false; mergeRunStart = k + 1; if (mergeRunStart < stringEnd) { mergedGlyph = srcGlyphs[mergeRunStart]; } } NS_ASSERTION(glyphs.Length() == 0, "Leftover glyphs, don't request merging of the last character with its next!"); } NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); } gfxTextRunFactory::Parameters GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags, gfxContext* aRefContext) { gfxTextRunFactory::Parameters params = { aRefContext, nullptr, nullptr, nullptr, 0, aTextRun->GetAppUnitsPerDevUnit() }; *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT; return params; } void nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); gfxFontStyle fontStyle = *fontGroup->GetStyle(); fontStyle.size *= 0.8; nsRefPtr smallFont = fontGroup->Copy(&fontStyle); if (!smallFont) return; uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); uint32_t length = aTextRun->GetLength(); const char16_t* str = aTextRun->mString.BeginReading(); nsRefPtr* styles = aTextRun->mStyles.Elements(); // Create a textrun so we can check cluster-start properties nsAutoPtr inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); if (!inner.get()) return; nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true); aTextRun->ResetGlyphRuns(); uint32_t runStart = 0; nsAutoTArray styleArray; nsAutoTArray canBreakBeforeArray; enum RunCaseState { kUpperOrCaseless, // will be untouched by font-variant:small-caps kLowercase, // will be uppercased and reduced kSpecialUpper // specials: don't shrink, but apply uppercase mapping }; RunCaseState runCase = kUpperOrCaseless; // Note that this loop runs from 0 to length *inclusive*, so the last // iteration is in effect beyond the end of the input text, to give a // chance to finish the last casing run we've found. // The last iteration, when i==length, must not attempt to look at the // character position [i] or the style data for styles[i], as this would // be beyond the valid length of the textrun or its style array. for (uint32_t i = 0; i <= length; ++i) { RunCaseState chCase = kUpperOrCaseless; // Unless we're at the end, figure out what treatment the current // character will need. if (i < length) { nsStyleContext* styleContext = styles[i]; // Characters that aren't the start of a cluster are ignored here. They // get added to whatever lowercase/non-lowercase run we're in. if (!inner->IsClusterStart(i)) { chCase = runCase; } else { if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { uint32_t ch = str[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } uint32_t ch2 = ToUpperCase(ch); if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) { chCase = kLowercase; } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) { // In Greek, check for characters that will be modified by the // GreekUpperCase mapping - this catches accented capitals where // the accent is to be removed (bug 307039). These are handled by // a transformed child run using the full-size font. GreekCasingState state = kStart; // don't need exact context here ch2 = GreekUpperCase(ch, &state); if (ch != ch2) { chCase = kSpecialUpper; } } } else { // Don't transform the character! I.e., pretend that it's not lowercase } } } // At the end of the text, or when the current character needs different // casing treatment from the current run, finish the run-in-progress // and prepare to accumulate a new run. // Note that we do not look at any source data for offset [i] here, // as that would be invalid in the case where i==length. if ((i == length || runCase != chCase) && runStart < i) { nsAutoPtr transformedChild; nsAutoPtr cachedChild; gfxTextRun* child; switch (runCase) { case kUpperOrCaseless: cachedChild = fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, flags); child = cachedChild.get(); break; case kLowercase: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, smallFont, flags, styleArray.Elements(), false); child = transformedChild; break; case kSpecialUpper: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild; break; } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, "lost some break-before values?"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); runStart = i; styleArray.Clear(); canBreakBeforeArray.Clear(); } if (i < length) { runCase = chCase; styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); } } } void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { uint32_t length = aTextRun->GetLength(); const char16_t* str = aTextRun->mString.BeginReading(); nsRefPtr* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray charsToMergeArray; nsAutoTArray deletedCharsArray; nsAutoTArray styleArray; nsAutoTArray canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; uint32_t i; for (i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext = styles[i]; uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr transformedChild; nsAutoPtr cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }