Bug 145975 - Implement nsCaseInsensitiveUTF8StringComparator r=smontagu

--HG--
extra : rebase_source : 8e5d63a6a5ca50b6e95a80421ad96a1d4a52ae77
This commit is contained in:
Justin Lebar ext:(%2C%20Kyle%20Huey%20%3Cme%40kylehuey.com%3E) 2010-08-31 18:03:40 -07:00
parent 02ef2fe17a
commit 8b20bfbcec
15 changed files with 572 additions and 210 deletions

View File

@ -363,3 +363,16 @@ static const PRUint32 gCaseBlocks [8] = {
0x00000000,
0x80000000
};
// We map x -> x, except for upper-case letters,
// which we map to their lower-case equivalents.
static const PRUint8 gASCIIToLower [128] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
};

View File

@ -281,6 +281,103 @@ static PRUnichar t4result[T4LEN+2] = {
0x0041 , // Dummy entry to prevent overflow
0x00
};
static unsigned char t6lhs[] = {
0x31 , // 0
0x19 , // 1
0x43 , // 2
0x67 , // 3
0xC3, 0x88 , // 4
0xC3, 0xA9 , // 5
0xC5, 0x87 , // 6
0xC7, 0x84 , // 7
0xC7, 0x86 , // 8
0xC7, 0x85 , // 9
0xCF, 0x80 , // 10
0xCE, 0xB2 , // 11
0xD0, 0xB8 , // 12
0xD2, 0xA5 , // 13
0xD7, 0x90 , // 14
0xE0, 0xA8, 0xA0 , // 15
0xE3, 0x82, 0xB0 , // 16
0xE5, 0x86, 0x85 , // 17
0xEC, 0x80, 0xA1 , // 18
0xEF, 0xBD, 0x88 , // 19
0xC7, 0x87 , // 20
0xC7, 0x88 , // 21
0xC7, 0x89 , // 22
0xC7, 0x8A , // 23
0xC7, 0x8B , // 24
0xC7, 0x8C , // 25
0xC7, 0xB1 , // 26
0xC7, 0xB2 , // 27
0xC7, 0xB3 , // 28
0xC9, 0x90 , // 29
0xC9, 0xB1 , // 30
0xEA, 0x99, 0x81 , // 31
0x00
};
static unsigned char t6rhs[] = {
0x31 , // 0
0x19 , // 1
0x43 , // 2
0x47 , // 3
0xC3, 0x88 , // 4
0xC3, 0x89 , // 5
0xC5, 0x87 , // 6
0xC7, 0x84 , // 7
0xC7, 0x84 , // 8
0xC7, 0x84 , // 9
0xCE, 0xA0 , // 10
0xCE, 0x92 , // 11
0xD0, 0x98 , // 12
0xD2, 0xA4 , // 13
0xD7, 0x90 , // 14
0xE0, 0xA8, 0xA0 , // 15
0xE3, 0x82, 0xB0 , // 16
0xE5, 0x86, 0x85 , // 17
0xEC, 0x80, 0xA1 , // 18
0xEF, 0xBC, 0xA8 , // 19
0xC7, 0x87 , // 20
0xC7, 0x87 , // 21
0xC7, 0x87 , // 22
0xC7, 0x8a , // 23
0xC7, 0x8a , // 24
0xC7, 0x8a , // 25
0xC7, 0xB1 , // 26
0xC7, 0xB1 , // 27
0xC7, 0xB1 , // 28
0xE2, 0xB1, 0xAF , // 29
0xE2, 0xB1, 0xAE , // 30
0xEA, 0x99, 0x80 , // 31
0x00
};
static const char *t7lhs = "aBcDeFGHIJKL1!!2!!a!uuuu";
static const char *t7rhs = "AbCdEFghijkL1!!2!!A!UUuU";
static const char *t8lhs = "aazzz";
static const char *t8rhs = "aBa";
static const char *t9lhs = "@a";
static const char *t9rhs = "`a";
bool CharByCharCompareEqual(const char *a, const char *b,
PRUint32 aLen, PRUint32 bLen)
{
// Do basically a CaseInsensitiveCompare(), but using
// CaseInsensitiveUTF8CharsEqual().
const char *aEnd = a + aLen;
const char *bEnd = b + bLen;
while (a < aEnd && b < bEnd) {
PRBool err;
if (!CaseInsensitiveUTF8CharsEqual(a, b, aEnd, bEnd, &a, &b, &err) || err)
return PR_FALSE;
}
return PR_TRUE;
}
void TestCaseConversion()
{
@ -291,7 +388,7 @@ void TestCaseConversion()
int i;
PRUnichar buf[256];
printf("Test 2 - ToUpper(PRUnichar, PRUnichar*):\n");
printf("Test 1 - ToUpper(PRUnichar, PRUnichar*):\n");
for(i=0;i < T2LEN ; i++)
{
PRUnichar ch = ToUpperCase(t2data[i]);
@ -300,7 +397,7 @@ void TestCaseConversion()
}
printf("Test 3 - ToLower(PRUnichar, PRUnichar*):\n");
printf("Test 2 - ToLower(PRUnichar, PRUnichar*):\n");
for(i=0;i < T3LEN; i++)
{
PRUnichar ch = ToLowerCase(t3data[i]);
@ -308,7 +405,7 @@ void TestCaseConversion()
printf("\tFailed!! result unexpected %d\n", i);
}
printf("Test 4 - ToTitle(PRUnichar, PRUnichar*):\n");
printf("Test 3 - ToTitle(PRUnichar, PRUnichar*):\n");
for(i=0;i < T4LEN; i++)
{
PRUnichar ch = ToTitleCase(t4data[i]);
@ -316,7 +413,7 @@ void TestCaseConversion()
printf("\tFailed!! result unexpected %d\n", i);
}
printf("Test 5 - ToUpper(PRUnichar*, PRUnichar*, PRUint32):\n");
printf("Test 4 - ToUpper(PRUnichar*, PRUnichar*, PRUint32):\n");
ToUpperCase(t2data, buf, T2LEN);
for(i = 0; i < T2LEN; i++)
{
@ -327,7 +424,7 @@ void TestCaseConversion()
}
}
printf("Test 6 - ToLower(PRUnichar*, PRUnichar*, PRUint32):\n");
printf("Test 5 - ToLower(PRUnichar*, PRUnichar*, PRUint32):\n");
ToLowerCase(t3data, buf, T3LEN);
for(i = 0; i < T3LEN; i++)
{
@ -338,11 +435,85 @@ void TestCaseConversion()
}
}
printf("Test 6 - CaseInsensitiveCompare UTF-8 (1):\n");
if (CaseInsensitiveCompare((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
printf("\tFailed!\n");
if (!CharByCharCompareEqual((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
printf("\tFailed character-by-character comparison!\n");
printf("Test 7 - CaseInsensitiveCompare UTF-8 (2):\n");
if (CaseInsensitiveCompare(t7lhs, t7rhs, strlen(t7lhs), strlen(t7rhs)))
printf("\tFailed!\n");
if (!CharByCharCompareEqual(t7lhs, t7rhs, sizeof(t7lhs), sizeof(t7rhs)))
printf("\tFailed character-by-character comparison!\n");
printf("Test 8a - CaseInsensitiveCompare UTF-8 (3):\n");
if (CaseInsensitiveCompare(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)) != -1)
printf("\tFailed!\n");
if (CharByCharCompareEqual(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)))
printf("\tFailed character-by-character comparison!\n");
printf("Test 8b - CaseInsensitiveCompare UTF-8 (4):\n");
if (CaseInsensitiveCompare(t8rhs, t8lhs, strlen(t8rhs), strlen(t8lhs)) != 1)
printf("\tFailed!\n");
// This test may seem a bit strange. But it's actually an easy bug to make
// if we tried to be clever and say that two ASCII characters x and y are
// case-insensitively equal if (x & ~0x20) == (y & ~0x20).
printf("Test 9 - CaseInsensitiveCompare UTF-8 (5):\n");
if (CaseInsensitiveCompare(t9rhs, t9lhs, strlen(t9lhs), strlen(t9rhs)) != 1)
printf("\tFailed!\n");
if (CharByCharCompareEqual(t9lhs, t9rhs, strlen(t9lhs), strlen(t9rhs)))
printf("\tFailed character-by-character comparison!\n");
printf("===========================\n");
printf("Finish case conversion test\n");
printf("===========================\n");
}
static void FuzzOneInvalidCaseConversion()
{
PRUint32 aLen = rand() % 32;
PRUint32 bLen = rand() % 32;
// We could use a static length-32 buffer for these, but then Valgrind
// wouldn't be able to detect errors.
unsigned char *aBuf = (unsigned char*)malloc(aLen * sizeof(unsigned char));
unsigned char *bBuf = (unsigned char*)malloc(bLen * sizeof(unsigned char));
for (PRUint32 i = 0; i < aLen; i++) {
aBuf[i] = rand() & 0xff;
}
for (PRUint32 i = 0; i < bLen; i++) {
bBuf[i] = rand() & 0xff;
}
CaseInsensitiveCompare((char*)aBuf, (char*)bBuf, aLen, bLen);
CharByCharCompareEqual((char*)aBuf, (char*)bBuf, aLen, bLen);
free(aBuf);
free(bBuf);
}
static void FuzzCaseConversion()
{
printf("==========================\n");
printf("Start fuzz case conversion\n");
printf("==========================\n");
srand(0);
printf("Fuzzing invalid UTF8 data...\n");
for (PRUint32 i = 0; i < 100000; i++) {
FuzzOneInvalidCaseConversion();
}
printf("===========================\n");
printf("Finish fuzz case conversion\n");
printf("===========================\n");
}
static void TestEntityConversion(PRUint32 version)
{
printf("==============================\n");
@ -567,6 +738,10 @@ int main(int argc, char** argv) {
// --------------------------------------------
FuzzCaseConversion();
// --------------------------------------------
TestEntityConversion(nsIEntityConverter::html40);
// --------------------------------------------

View File

@ -1,32 +1,3 @@
Instructions for using these tools are on the Mozilla Wiki.
* How to generate various properties files in intl/unicharutils/tables and
header files in intl/unicharutils/src
( written by Jungshik Shin for bug 210502
https://bugzilla.mozilla.org/show_bug.cgi?id=210502 on 2005-04-05 )
1. Grab the latest version of idnkit at http://www.nic.ad.jp/en/idn/index.html
(http://www.nic.ad.jp/ja/idn/idnkit/download/index.html )
2. There are three files we need in the kit:
generate_normalize_data.pl, UCD.pm and SparseMap.pm
3. a. Download the following Unicode data files :
CaseFolding.txt,CompositionExclusions.txt,
SpecialCasing.txt, UnicodeData.txt
b. Rename UnicodeData.txt to UnicodeData-Latest.txt
The latest version is, as of this writing, in
ftp://ftp.unicode.org/Public/4.1.0/ucd
4. a. Run generate_normalize_data.pl and save the output to a temporary file
b. Edit the file
- remove the case folding part (search for 'Lowercase' and delete
all the lines following it) because we have separate scripts for that,
- replace 'unsigned short' and 'unsigned long' with 'PRUnichar' and
'PRUint32'
c. Replace the actual source part (after the license) of
intl/unicharutil/src/normalization_data.h with the file you edited.
5. Generate casetable.h and cattable.h with gencasetable.pl and gencattable.pl
Just running them will put casetable.h and cattable.h in the right place.
https://wiki.mozilla.org/I18n:Updating_Unicode_version

View File

@ -383,6 +383,38 @@ for($idx=0;$idx<8;$idx++)
printf OUT "\n";
}
}
print OUT "};\n\n";
######################################################################
#
# Print out gASCIIToLower table
#
######################################################################
print OUT "// We map x -> x, except for upper-case letters,\n";
print OUT "// which we map to their lower-case equivalents.\n";
print OUT "static const PRUint8 gASCIIToLower [128] = {\n";
# Map x -> x, except for upper-case letters, which we map to lower-case
# letters.
for($idx=0; $idx < 128; $idx++)
{
if ($idx % 16 == 0) {
print OUT " "
}
if (65 <= $idx && $idx <= 90) {
printf OUT "0x%02x", ($idx + 0x20);
} else {
printf OUT "0x%02x", $idx;
}
if (($idx+1) % 16 != 0) {
print OUT ", ";
}
else {
print OUT ",\n";
}
}
print OUT "};\n";

View File

@ -46,10 +46,11 @@
#include "nsServiceManagerUtils.h"
#include "nsXPCOMStrings.h"
#include "casetable.h"
#include "nsUTF8Utils.h"
#include <ctype.h>
// For gUpperToTitle
// For gUpperToTitle
enum {
kUpperIdx =0,
kTitleIdx
@ -62,18 +63,19 @@ enum {
kDiffIdx
};
#define IS_ASCII(u) ( 0x0000 == ((u) & 0xFF80))
#define IS_ASCII_UPPER(u) ((0x0041 <= (u)) && ( (u) <= 0x005a))
#define IS_ASCII_LOWER(u) ((0x0061 <= (u)) && ( (u) <= 0x007a))
#define IS_ASCII(u) ((u) < 0x80)
#define IS_ASCII_UPPER(u) (('A' <= (u)) && ( (u) <= 'Z' ))
#define IS_ASCII_LOWER(u) (('a' <= (u)) && ( (u) <= 'z'))
#define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))
#define IS_ASCII_SPACE(u) ( 0x0020 == (u) )
#define IS_ASCII_SPACE(u) ( ' ' == (u) )
#define IS_NOCASE_CHAR(u) (0==(1&(gCaseBlocks[(u)>>13]>>(0x001F&((u)>>8)))))
// Size of Tables
#define CASE_MAP_CACHE_SIZE 0x40
#define CASE_MAP_CACHE_MASK 0x3F
// Changing these numbers may break UTF-8 caching. Be careful!
#define CASE_MAP_CACHE_SIZE 0x100
#define CASE_MAP_CACHE_MASK 0xFF
struct nsCompressedMap {
const PRUnichar *mTable;
@ -83,75 +85,94 @@ struct nsCompressedMap {
PRUnichar Map(PRUnichar aChar)
{
// no need to worry about thread safety since cached values are
// not objects but primitive data types which could be
// accessed in atomic operations. We need to access
// the whole 32 bit of cachedData at once in order to make it
// thread safe. Never access bits from mCache directly.
// We don't need explicit locking here since the cached values are int32s,
// which are read and written atomically. The following code is threadsafe
// because we never access bits from mCache directly -- we always first
// read the entire entry into a local variable and then mask off the bits
// we're interested in.
// Check the 256-byte cache first and bail with our answer if we can.
PRUint32 cachedData = mCache[aChar & CASE_MAP_CACHE_MASK];
if(aChar == ((cachedData >> 16) & 0x0000FFFF))
return (cachedData & 0x0000FFFF);
if (aChar == ((cachedData >> 16) & 0x0000FFFF))
return cachedData & 0x0000FFFF;
// try the last index first
// store into local variable so we can be thread safe
PRUint32 base = mLastBase;
// Now try the last index we looked up, storing it into a local variable
// for thread-safety.
PRUint32 base = mLastBase;
PRUnichar res = 0;
if (( aChar <= ((mTable[base+kSizeEveryIdx] >> 8) +
mTable[base+kLowIdx])) &&
( mTable[base+kLowIdx] <= aChar ))
{
// Hit the last base
if(((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) &&
// Does this character fit in the slot?
if ((aChar <= ((mTable[base+kSizeEveryIdx] >> 8) +
mTable[base+kLowIdx])) &&
(mTable[base+kLowIdx] <= aChar)) {
// This character uses the same base as our last lookup, so the
// conversion is easy.
if (((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) &&
(0 != ((aChar - mTable[base+kLowIdx]) %
(mTable[base+kSizeEveryIdx] & 0x00FF))))
{
res = aChar;
} else {
res = aChar + mTable[base+kDiffIdx];
}
(mTable[base+kSizeEveryIdx] & 0x00FF))))
{
res = aChar;
} else {
res = aChar + mTable[base+kDiffIdx];
}
} else {
res = this->Lookup(0, (mSize/2), mSize-1, aChar);
// Do the full lookup.
res = this->Lookup(0, mSize/2, mSize-1, aChar);
}
// Cache the result and return.
mCache[aChar & CASE_MAP_CACHE_MASK] =
(((aChar << 16) & 0xFFFF0000) | (0x0000FFFF & res));
((aChar << 16) & 0xFFFF0000) | (0x0000FFFF & res);
return res;
}
// Takes as arguments the left bound, middle, right bound, and character to
// search for. Executes a binary search.
PRUnichar Lookup(PRUint32 l,
PRUint32 m,
PRUint32 r,
PRUnichar aChar)
{
PRUint32 base = m*3;
if ( aChar > ((mTable[base+kSizeEveryIdx] >> 8) +
PRUint32 base = m*3; // Every line in the table is 3 units wide.
// Is aChar past the top of the current table entry? (The upper byte of
// the 'every' entry contains the offset to the end of this entry.)
if (aChar > ((mTable[base+kSizeEveryIdx] >> 8) +
mTable[base+kLowIdx]))
{
if( l > m )
if (l > m || l == r)
return aChar;
// Advance one round.
PRUint32 newm = (m+r+1)/2;
if(newm == m)
if (newm == m)
newm++;
return this->Lookup(m+1, newm , r, aChar);
} else if ( mTable[base+kLowIdx] > aChar ) {
if( r < m )
return this->Lookup(m+1, newm, r, aChar);
// Is aChar below the bottom of the current table entry?
} else if (mTable[base+kLowIdx] > aChar) {
if (r < m || l == r)
return aChar;
// Advance one round
PRUint32 newm = (l+m-1)/2;
if(newm == m)
newm++;
return this->Lookup(l, newm, m-1, aChar);
} else {
if(((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) &&
(0 != ((aChar - mTable[base+kLowIdx]) %
(mTable[base+kSizeEveryIdx] & 0x00FF))))
// We've found the entry aChar should live in.
} else {
// Determine if aChar falls in a gap. (The lower byte of the 'every'
// entry contains n for which every nth character from the base is a
// character of interest.)
if (((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) &&
(0 != ((aChar - mTable[base+kLowIdx]) %
(mTable[base+kSizeEveryIdx] & 0x00FF))))
{
return aChar;
}
mLastBase = base; // cache the base
// If aChar doesn't fall in the gap, cache and convert.
mLastBase = base;
return aChar + mTable[base+kDiffIdx];
}
}
@ -167,6 +188,29 @@ static nsCompressedMap gLowerMap = {
gToLowerItems
};
// We want ToLowerCase(PRUnichar) and ToLowerCaseASCII(PRUnichar) to be fast
// when they're called from within the case-insensitive comparators, so we
// define inlined versions.
static NS_ALWAYS_INLINE PRUnichar
ToLowerCase_inline(PRUnichar aChar)
{
if (IS_ASCII(aChar)) {
return gASCIIToLower[aChar];
} else if (IS_NOCASE_CHAR(aChar)) {
return aChar;
}
return gLowerMap.Map(aChar);
}
static NS_ALWAYS_INLINE PRUnichar
ToLowerCaseASCII_inline(const PRUnichar aChar)
{
if (IS_ASCII(aChar))
return gASCIIToLower[aChar];
return aChar;
}
void
ToLowerCase(nsAString& aString)
{
@ -189,9 +233,7 @@ ToLowerCase(const nsAString& aSource,
PRUnichar
ToLowerCaseASCII(const PRUnichar aChar)
{
if (IS_ASCII_UPPER(aChar))
return aChar + 0x0020;
return aChar;
return ToLowerCaseASCII_inline(aChar);
}
void
@ -218,115 +260,58 @@ ToUpperCase(const nsAString& aSource,
PRInt32
nsCaseInsensitiveStringComparator::operator()(const PRUnichar* lhs,
const PRUnichar* rhs,
PRUint32 aLength) const
PRUint32 lLength,
PRUint32 rLength) const
{
return CaseInsensitiveCompare(lhs, rhs, aLength);
return (lLength == rLength) ? CaseInsensitiveCompare(lhs, rhs, lLength) :
(lLength > rLength) ? 1 : -1;
}
PRInt32
nsCaseInsensitiveStringComparator::operator()(PRUnichar lhs,
PRUnichar rhs) const
nsCaseInsensitiveUTF8StringComparator::operator()(const char* lhs,
const char* rhs,
PRUint32 lLength,
PRUint32 rLength) const
{
// see if they're an exact match first
if (lhs == rhs)
return 0;
lhs = ToLowerCase(lhs);
rhs = ToLowerCase(rhs);
if (lhs == rhs)
return 0;
else if (lhs < rhs)
return -1;
else
return 1;
return CaseInsensitiveCompare(lhs, rhs, lLength, rLength);
}
PRInt32
nsASCIICaseInsensitiveStringComparator::operator()(const PRUnichar* lhs,
const PRUnichar* rhs,
PRUint32 aLength) const
PRUint32 lLength,
PRUint32 rLength) const
{
while (aLength) {
if (lLength != rLength) {
if (lLength > rLength)
return 1;
return -1;
}
while (rLength) {
PRUnichar l = *lhs++;
PRUnichar r = *rhs++;
if (l != r) {
l = ToLowerCaseASCII(l);
r = ToLowerCaseASCII(r);
l = ToLowerCaseASCII_inline(l);
r = ToLowerCaseASCII_inline(r);
if (l > r)
return 1;
else if (r > l)
return -1;
}
aLength--;
rLength--;
}
return 0;
}
PRInt32
nsASCIICaseInsensitiveStringComparator::operator()(PRUnichar lhs,
PRUnichar rhs) const
{
// see if they're an exact match first
if (lhs == rhs)
return 0;
lhs = ToLowerCaseASCII(lhs);
rhs = ToLowerCaseASCII(rhs);
if (lhs == rhs)
return 0;
else if (lhs < rhs)
return -1;
else
return 1;
}
#endif // MOZILLA_INTERNAL_API
PRInt32
CaseInsensitiveCompare(const PRUnichar *a,
const PRUnichar *b,
PRUint32 len)
{
NS_ASSERTION(a && b, "Do not pass in invalid pointers!");
if (len) {
do {
PRUnichar c1 = *a++;
PRUnichar c2 = *b++;
if (c1 != c2) {
c1 = ToLowerCase(c1);
c2 = ToLowerCase(c2);
if (c1 != c2) {
if (c1 < c2) {
return -1;
}
return 1;
}
}
} while (--len != 0);
}
return 0;
}
PRUnichar
ToLowerCase(PRUnichar aChar)
{
if (IS_ASCII(aChar)) {
if (IS_ASCII_UPPER(aChar))
return aChar + 0x0020;
else
return aChar;
} else if (IS_NOCASE_CHAR(aChar)) {
return aChar;
}
return gLowerMap.Map(aChar);
return ToLowerCase_inline(aChar);
}
void
@ -342,7 +327,7 @@ ToUpperCase(PRUnichar aChar)
{
if (IS_ASCII(aChar)) {
if (IS_ASCII_LOWER(aChar))
return aChar - 0x0020;
return aChar - 0x20;
else
return aChar;
} else if (IS_NOCASE_CHAR(aChar)) {
@ -380,7 +365,7 @@ ToTitleCase(PRUnichar aChar)
}
PRUnichar upper = gUpperMap.Map(aChar);
if (0x01C0 == ( upper & 0xFFC0)) {
for (PRUint32 i = 0 ; i < gUpperToTitleItems; i++) {
if (upper == gUpperToTitle[(i*2)+kUpperIdx]) {
@ -391,3 +376,167 @@ ToTitleCase(PRUnichar aChar)
return upper;
}
PRInt32
CaseInsensitiveCompare(const PRUnichar *a,
const PRUnichar *b,
PRUint32 len)
{
NS_ASSERTION(a && b, "Do not pass in invalid pointers!");
if (len) {
do {
PRUnichar c1 = *a++;
PRUnichar c2 = *b++;
if (c1 != c2) {
c1 = ToLowerCase_inline(c1);
c2 = ToLowerCase_inline(c2);
if (c1 != c2) {
if (c1 < c2) {
return -1;
}
return 1;
}
}
} while (--len != 0);
}
return 0;
}
// Calculates the codepoint of the UTF8 sequence starting at aStr. Sets aNext
// to the byte following the end of the sequence.
//
// If the sequence is invalid, or if computing the codepoint would take us off
// the end of the string (as marked by aEnd), returns -1 and does not set
// aNext. Note that this function doesn't check that aStr < aEnd -- it assumes
// you've done that already.
static NS_ALWAYS_INLINE PRUint32
GetLowerUTF8Codepoint(const char* aStr, const char* aEnd, const char **aNext)
{
// Convert to unsigned char so that stuffing chars into PRUint32s doesn't
// sign extend.
const unsigned char *str = (unsigned char*)aStr;
if (UTF8traits::isASCII(str[0])) {
// It's ASCII; just convert to lower-case and return it.
*aNext = aStr + 1;
return gASCIIToLower[*str];
}
if (UTF8traits::is2byte(str[0]) && NS_LIKELY(aStr + 1 < aEnd)) {
// It's a two-byte sequence, so it looks like
// 110XXXXX 10XXXXXX.
// This is definitely in the BMP, so we can store straightaway into a
// PRUint16.
PRUint16 c;
c = (str[0] & 0x1F) << 6;
c += (str[1] & 0x3F);
if (!IS_NOCASE_CHAR(c))
c = gLowerMap.Map(c);
*aNext = aStr + 2;
return c;
}
if (UTF8traits::is3byte(str[0]) && NS_LIKELY(aStr + 2 < aEnd)) {
// It's a three-byte sequence, so it looks like
// 1110XXXX 10XXXXXX 10XXXXXX.
// This will just barely fit into 16-bits, so store into a PRUint16.
PRUint16 c;
c = (str[0] & 0x0F) << 12;
c += (str[1] & 0x3F) << 6;
c += (str[2] & 0x3F);
if (!IS_NOCASE_CHAR(c))
c = gLowerMap.Map(c);
*aNext = aStr + 3;
return c;
}
if (UTF8traits::is4byte(str[0]) && NS_LIKELY(aStr + 3 < aEnd)) {
// It's a four-byte sequence, so it looks like
// 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX.
// Unless this is an overlong sequence, the codepoint it encodes definitely
// isn't in the BMP, so we don't bother trying to convert it to lower-case.
PRUint32 c;
c = (str[0] & 0x07) << 18;
c += (str[1] & 0x3F) << 12;
c += (str[2] & 0x3F) << 6;
c += (str[3] & 0x3F);
*aNext = aStr + 4;
return c;
}
// Hm, we don't understand this sequence.
return -1;
}
PRInt32 CaseInsensitiveCompare(const char *aLeft,
const char *aRight,
PRUint32 aLeftBytes,
PRUint32 aRightBytes)
{
const char *leftEnd = aLeft + aLeftBytes;
const char *rightEnd = aRight + aRightBytes;
while (aLeft < leftEnd && aRight < rightEnd) {
PRUint32 leftChar = GetLowerUTF8Codepoint(aLeft, leftEnd, &aLeft);
if (NS_UNLIKELY(leftChar == PRUint32(-1)))
return -1;
PRUint32 rightChar = GetLowerUTF8Codepoint(aRight, rightEnd, &aRight);
if (NS_UNLIKELY(rightChar == PRUint32(-1)))
return -1;
// Now leftChar and rightChar are lower-case, so we can compare them.
if (leftChar != rightChar) {
if (leftChar > rightChar)
return 1;
return -1;
}
}
// Make sure that if one string is longer than the other we return the
// correct result.
if (aLeft < leftEnd)
return 1;
if (aRight < rightEnd)
return -1;
return 0;
}
PRBool
CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
const char* aLeftEnd, const char* aRightEnd,
const char** aLeftNext, const char** aRightNext,
PRBool* aErr)
{
NS_ASSERTION(aLeftNext, "Out pointer shouldn't be null.");
NS_ASSERTION(aRightNext, "Out pointer shouldn't be null.");
NS_ASSERTION(aErr, "Out pointer shouldn't be null.");
NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd.");
NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd.");
PRUint32 leftChar = GetLowerUTF8Codepoint(aLeft, aLeftEnd, aLeftNext);
if (NS_UNLIKELY(leftChar == PRUint32(-1))) {
*aErr = PR_TRUE;
return PR_FALSE;
}
PRUint32 rightChar = GetLowerUTF8Codepoint(aRight, aRightEnd, aRightNext);
if (NS_UNLIKELY(rightChar == PRUint32(-1))) {
*aErr = PR_TRUE;
return PR_FALSE;
}
// Can't have an error past this point.
*aErr = PR_FALSE;
return leftChar == rightChar;
}

View File

@ -78,9 +78,17 @@ class nsCaseInsensitiveStringComparator : public nsStringComparator
public:
virtual PRInt32 operator() (const PRUnichar*,
const PRUnichar*,
PRUint32 aLength) const;
virtual PRInt32 operator() (PRUnichar,
PRUnichar) const;
PRUint32,
PRUint32) const;
};
class nsCaseInsensitiveUTF8StringComparator : public nsCStringComparator
{
public:
virtual PRInt32 operator() (const char*,
const char*,
PRUint32,
PRUint32) const;
};
class nsCaseInsensitiveStringArrayComparator
@ -97,9 +105,8 @@ class nsASCIICaseInsensitiveStringComparator : public nsStringComparator
public:
virtual int operator() (const PRUnichar*,
const PRUnichar*,
PRUint32 aLength) const;
virtual int operator() (PRUnichar,
PRUnichar) const;
PRUint32,
PRUint32) const;
};
inline PRBool
@ -126,4 +133,33 @@ CaseInsensitiveFindInReadable(const nsAString& aPattern,
PRInt32
CaseInsensitiveCompare(const PRUnichar *a, const PRUnichar *b, PRUint32 len);
PRInt32
CaseInsensitiveCompare(const char* aLeft, const char* aRight,
PRUint32 aLeftBytes, PRUint32 aRightBytes);
/**
* This function determines whether the UTF-8 sequence pointed to by aLeft is
* case-insensitively-equal to the UTF-8 sequence pointed to by aRight.
*
* aLeftEnd marks the first memory location past aLeft that is not part of
* aLeft; aRightEnd similarly marks the end of aRight.
*
* The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
*
* The function stores the addresses of the next characters in the sequence
* into aLeftNext and aRightNext. It's up to the caller to make sure that the
* returned pointers are valid -- i.e. the function may return aLeftNext >=
* aLeftEnd or aRightNext >= aRightEnd.
*
* If the function encounters invalid text, it sets aErr to true and returns
* false, possibly leaving aLeftNext and aRightNext uninitialized. If the
* function returns true, aErr is guaranteed to be false and both aLeftNext and
* aRightNext are guaranteed to be initialized.
*/
PRBool
CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
const char* aLeftEnd, const char* aRightEnd,
const char** aLeftNext, const char** aRightNext,
PRBool* aErr);
#endif /* nsUnicharUtils_h__ */

View File

@ -594,7 +594,7 @@ FindInReadable( const nsAString& aPattern,
{
// fast inner loop (that's what it's called, not what it is) looks for a potential match
while ( aSearchStart != aSearchEnd &&
compare(*aPatternStart, *aSearchStart) )
compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
++aSearchStart;
// if we broke out of the `fast' loop because we're out of string ... we're done: no match
@ -631,7 +631,7 @@ FindInReadable( const nsAString& aPattern,
// else if we mismatched ... it's time to advance to the next search position
// and get back into the `fast' loop
if ( compare(*testPattern, *testSearch) )
if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
{
++aSearchStart;
break;

View File

@ -140,7 +140,7 @@ namespace places {
testSourceItr(sourceStart);
// Keep trying to match the token one by one until it doesn't match.
while (!caseInsensitiveCompare(*testTokenItr, *testSourceItr)) {
while (!caseInsensitiveCompare(testTokenItr, testSourceItr, 1, 1)) {
// We matched something, so move down one.
testTokenItr++;
testSourceItr++;

View File

@ -6437,7 +6437,7 @@ StringCaseInsensitiveEquals(const PRUnichar* aChars1, const PRUint32 aNumChars1,
return PR_FALSE;
nsCaseInsensitiveStringComparator comp;
return comp(aChars1, aChars2, aNumChars1) == 0;
return comp(aChars1, aChars2, aNumChars1, aNumChars2) == 0;
}
UINT nsWindow::MapFromNativeToDOM(UINT aNativeKeyCode)

View File

@ -80,8 +80,7 @@ class NS_COM nsCaseInsensitiveCStringComparator
public:
typedef char char_type;
virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const;
virtual int operator()( char_type, char_type ) const;
virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const;
};
class nsCaseInsensitiveCStringArrayComparator

View File

@ -50,8 +50,7 @@ class NS_COM nsTStringComparator_CharT
nsTStringComparator_CharT() {}
virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const = 0;
virtual int operator()( char_type, char_type ) const = 0;
virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const = 0;
};
@ -66,8 +65,7 @@ class NS_COM nsTDefaultStringComparator_CharT
nsTDefaultStringComparator_CharT() {}
virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const;
virtual int operator()( char_type, char_type ) const;
virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const;
};
/**

View File

@ -802,7 +802,7 @@ FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT
{
// fast inner loop (that's what it's called, not what it is) looks for a potential match
while ( aSearchStart != aSearchEnd &&
compare(*aPatternStart, *aSearchStart) )
compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
++aSearchStart;
// if we broke out of the `fast' loop because we're out of string ... we're done: no match
@ -839,7 +839,7 @@ FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT
// else if we mismatched ... it's time to advance to the next search position
// and get back into the `fast' loop
if ( compare(*testPattern, *testSearch) )
if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
{
++aSearchStart;
break;
@ -871,7 +871,7 @@ RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, Iterator
--searchEnd;
// Check last character, if a match, explore further from here
if ( compare(*patternEnd, *searchEnd) == 0 )
if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 )
{
// We're at a potential match, let's see if we really hit one
IteratorT testPattern(patternEnd);
@ -900,7 +900,7 @@ RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, Iterator
--testPattern;
--testSearch;
}
while ( compare(*testPattern, *testSearch) == 0 );
while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 );
}
}

View File

@ -53,23 +53,17 @@
int
nsCaseInsensitiveCStringComparator::operator()( const char_type* lhs, const char_type* rhs, PRUint32 aLength ) const
nsCaseInsensitiveCStringComparator::operator()( const char_type* lhs,
const char_type* rhs,
PRUint32 lLength,
PRUint32 rLength ) const
{
PRInt32 result=PRInt32(PL_strncasecmp(lhs, rhs, aLength));
if (lLength != rLength)
return (lLength > rLength) ? 1 : -1;
PRInt32 result=PRInt32(PL_strncasecmp(lhs, rhs, lLength));
//Egads. PL_strncasecmp is returning *very* negative numbers.
//Some folks expect -1,0,1, so let's temper its enthusiasm.
if (result<0)
result=-1;
return result;
}
int
nsCaseInsensitiveCStringComparator::operator()( char lhs, char rhs ) const
{
if (lhs == rhs) return 0;
lhs = tolower(lhs);
rhs = tolower(rhs);
return lhs - rhs;
}

View File

@ -53,7 +53,7 @@ Compare( const nsTSubstring_CharT::base_string_type& lhs, const nsTSubstring_Cha
size_type lengthToCompare = NS_MIN(lLength, rLength);
int result;
if ( (result = comp(leftIter.get(), rightIter.get(), lengthToCompare)) == 0 )
if ( (result = comp(leftIter.get(), rightIter.get(), lengthToCompare, lengthToCompare)) == 0 )
{
if ( lLength < rLength )
result = -1;
@ -67,13 +67,8 @@ Compare( const nsTSubstring_CharT::base_string_type& lhs, const nsTSubstring_Cha
}
int
nsTDefaultStringComparator_CharT::operator()( const char_type* lhs, const char_type* rhs, PRUint32 aLength ) const
nsTDefaultStringComparator_CharT::operator()( const char_type* lhs, const char_type* rhs, PRUint32 lLength, PRUint32 rLength) const
{
return nsCharTraits<CharT>::compare(lhs, rhs, aLength);
return (lLength == rLength) ? nsCharTraits<CharT>::compare(lhs, rhs, lLength) :
(lLength > rLength) ? 1 : -1;
}
int
nsTDefaultStringComparator_CharT::operator()( char_type lhs, char_type rhs) const
{
return lhs - rhs;
}

View File

@ -605,7 +605,7 @@ nsTSubstring_CharT::Equals( const self_type& str ) const
PRBool
nsTSubstring_CharT::Equals( const self_type& str, const comparator_type& comp ) const
{
return mLength == str.mLength && comp(mData, str.mData, mLength) == 0;
return mLength == str.mLength && comp(mData, str.mData, mLength, str.mLength) == 0;
}
PRBool
@ -635,7 +635,7 @@ nsTSubstring_CharT::Equals( const char_type* data, const comparator_type& comp )
// XXX avoid length calculation?
size_type length = char_traits::length(data);
return mLength == length && comp(mData, data, mLength) == 0;
return mLength == length && comp(mData, data, mLength, length) == 0;
}
PRBool