mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 1105644 - Lowercase words before passing them to libhyphen, so as to match patterns fully. r=smontagu
This commit is contained in:
parent
ef5fa9a320
commit
6aba8af82d
@ -80,9 +80,46 @@ nsHyphenator::Hyphenate(const nsAString& aString,
|
||||
}
|
||||
|
||||
if (inWord) {
|
||||
const char16_t *begin = aString.BeginReading();
|
||||
NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
|
||||
wordLimit - wordStart);
|
||||
// Convert the word to utf-8 for libhyphen, lowercasing it as we go
|
||||
// so that it will match the (lowercased) patterns (bug 1105644).
|
||||
nsAutoCString utf8;
|
||||
const char16_t *begin = aString.BeginReading() + wordStart;
|
||||
const char16_t *end = aString.BeginReading() + wordLimit;
|
||||
while (begin < end) {
|
||||
uint32_t ch = *begin++;
|
||||
|
||||
if (NS_IS_HIGH_SURROGATE(ch)) {
|
||||
if (begin < end && NS_IS_LOW_SURROGATE(*begin)) {
|
||||
ch = SURROGATE_TO_UCS4(ch, *begin++);
|
||||
} else {
|
||||
ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR
|
||||
}
|
||||
} else if (NS_IS_LOW_SURROGATE(ch)) {
|
||||
ch = 0xfffd; // unpaired surrogate
|
||||
}
|
||||
|
||||
// XXX What about language-specific casing? Consider Turkish I/i...
|
||||
// In practice, it looks like the current patterns will not be
|
||||
// affected by this, as they treat dotted and undotted i similarly.
|
||||
ch = ToLowerCase(ch);
|
||||
|
||||
if (ch < 0x80) { // U+0000 - U+007F
|
||||
utf8.Append(ch);
|
||||
} else if (ch < 0x0800) { // U+0100 - U+07FF
|
||||
utf8.Append(0xC0 | (ch >> 6));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
} else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF
|
||||
utf8.Append(0xE0 | (ch >> 12));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 6)));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
} else {
|
||||
utf8.Append(0xF0 | (ch >> 18));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 12)));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 6)));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
}
|
||||
}
|
||||
|
||||
nsAutoTArray<char,200> utf8hyphens;
|
||||
utf8hyphens.SetLength(utf8.Length() + 5);
|
||||
char **rep = nullptr;
|
||||
|
Loading…
Reference in New Issue
Block a user