libs-Unicode_Collation: Replace with new patches from Fabian Maurer.

This commit is contained in:
Zebediah Figura 2020-12-08 20:14:26 -06:00
parent fce121fcd9
commit 213037cb33
9 changed files with 2066 additions and 315 deletions

View File

@ -0,0 +1,598 @@
From 2886829edafa6bf3fe64b15d092a380fd53e9ad4 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Fri, 10 Apr 2020 18:47:18 +0200
Subject: [PATCH] kernelbase: Implement sortkey generation on official tables
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 137 ++++++++++++
dlls/kernelbase/locale.c | 399 ++++++++++++++++++++++++-----------
2 files changed, 413 insertions(+), 123 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 3fdfa251144..d875bf94f92 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -2687,6 +2687,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
lstrlenW(symbols_stripped) + 1, ret);
ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
+ /* test small buffer */
+ lstrcpyW(buf, fooW);
+ ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
+ ok(ret == 0, "Expected a failure\n");
+ ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
+ "%s unexpected error code %d\n", func_name, GetLastError());;
+
/* test srclen = 0 */
SetLastError(0xdeadbeef);
ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));
@@ -3114,6 +3121,135 @@ static void test_sorting(void)
}
}
+struct sorting_test_entry {
+ const WCHAR *locale;
+ int result_sortkey;
+ int result_compare;
+ DWORD flags;
+ const WCHAR *first;
+ const WCHAR *second;
+ BOOL broken_on_xp;
+};
+
+static const struct sorting_test_entry unicode_sorting_tests[] =
+{
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0037", L"\x277c", TRUE }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1eca", L"\x1ecb" }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d05", L"\x1d48" }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x19d7", L"\x096d" }, /* Normal character diacritics */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00f5", L"\x1ecf" }, /* Normal character diacritics */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x2793", L"\x0d70", TRUE }, /* Normal character diacritics */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"A", L"a" }, /* Normal character case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"z", L"Z" }, /* Normal character case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5a6", L"\xe5a5\x0333", TRUE }, /* PUA character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5d7", L"\xe5d6\x0330", TRUE }, /* PUA character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\u276a", L"\u2768" }, /* Symbols add diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\u204d", L"\uff02" }, /* Symbols add case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", TRUE }, /* Default character, when there is main weight extra there must be no diacritic weight */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \u2060 b", L"a b" }, /* Unsortable characters */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \xfff0 b", L"a b" }, /* Invalid/undefined characters */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a\x139F a", L"a a" }, /* Invalid/undefined characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a\x139F a", L"a b" }, /* Invalid/undefinde characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x016d" }, /* Default characters */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3fcb\x7fd5", L"\x0006\x3032" }, /* Default characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc\x30fd", L"\x00fa\x1833" }, /* Default characters */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1B56\x0330", L"\x1096" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1817\x0333", L"\x19d7" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x04de\x05ac", L"\x0499" }, /* Diacritic is added */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x01ba\x0654", L"\x01b8" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x06b7\x06eb", L"\x06b6" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1420\x0333", L"\x141f" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN ,0, L"\x1b56\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1b56\x0654\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11bc", L"\x110b" }, /* Jamo case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11c1", L"\x1111" }, /* Jamo case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11af", L"\x1105" }, /* Jamo case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11c2", L"\x11f5" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1108", L"\x1121" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1116", L"\x11c7" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11b1", L"\x11d1" }, /* Jamo main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4550\x73d2", L"\x3211\x23ad" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3265", L"\x4079" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4c19\x68d0\x52d0", L"\x316d" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x72dd", L"\x6b8a" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x5d61", L"\x3aef" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207a", L"\xfe62" }, /* Symbols case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe65", L"\xff1e" }, /* Symbols case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x2502", L"\xffe8" }, /* Symbols case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x21da", L"\x21dc" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x29fb", L"\x2295" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0092", L"\x009c" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0092", L"\x009c" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x3099", L"\x309a" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x309b", L"\x05a2" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\xff9e", L"\x0e47" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a b", L"\x0103 a" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"\x0103" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e x", L"\x0113 v" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"\x0113" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c s", L"\x0109 r" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"\x0109" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a \x0103", L"A a" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"A" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e \x0113", L"E e" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"E" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c \x0109", L"C c" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"C" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1152", L"\x1153" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1143", L"\x1145" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1196", L"\x1174" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x318e", L"\x382a" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcb", L"\x3d13" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcc", L"\x8632" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd847", L"\x382a" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd879", L"\x3d13" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd850", L"\x8632" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
+};
+
+static void test_unicode_sorting(void)
+{
+ int i;
+ int ret1;
+ int ret2;
+ BYTE buffer[1000];
+ if (!pLCMapStringEx)
+ {
+ win_skip("LCMapStringEx not available\n");
+ return;
+ }
+ for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
+ {
+ BYTE buff1[1000];
+ BYTE buff2[1000];
+ int len1, len2;
+ int result;
+ const struct sorting_test_entry *entry = &unicode_sorting_tests[i];
+
+ len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);
+ len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);
+
+ result = memcmp(buff1, buff2, min(len1, len2)) + 2;
+
+ ok (result == entry->result_sortkey || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_sortkey, result);
+
+ result = CompareStringEx(entry->locale, entry->flags, entry->first, -1, entry->second, -1, NULL, NULL, 0);
+ if (strcmp(winetest_platform, "wine")) // Disable test on wine for now
+ ok (result == entry->result_compare || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_compare, result);
+ }
+ /* Test diacritics when buffer is short */
+ ret1 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 20, NULL, NULL, 0);
+ ret2 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 0, NULL, NULL, 0);
+ ok(ret1 == ret2, "Got ret1=%d, ret2=%d\n", ret1, ret2);
+}
+
static void test_FoldStringA(void)
{
int ret, i, j;
@@ -6967,4 +7103,5 @@ START_TEST(locale)
test_NLSVersion();
/* this requires collation table patch to make it MS compatible */
if (0) test_sorting();
+ test_unicode_sorting();
}
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index c60b796aa48..3a29f3e8250 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2128,127 +2128,6 @@ static int wcstombs_codepage( UINT codepage, DWORD flags, const WCHAR *src, int
return wcstombs_sbcs( info, src, srclen, dst, dstlen );
}
-
-static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
-{
- WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
- int key_len[4];
- char *key_ptr[4];
- const WCHAR *src_save = src;
- int srclen_save = srclen;
-
- key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
- for (; srclen; srclen--, src++)
- {
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
- dummy[0] = *src;
- if (decomposed_len)
- {
- for (i = 0; i < decomposed_len; i++)
- {
- WCHAR wch = dummy[i];
- unsigned int ce;
-
- if ((flags & NORM_IGNORESYMBOLS) &&
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
- continue;
-
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
- if (ce != (unsigned int)-1)
- {
- if (ce >> 16) key_len[0] += 2;
- if ((ce >> 8) & 0xff) key_len[1]++;
- if ((ce >> 4) & 0x0f) key_len[2]++;
- if (ce & 1)
- {
- if (wch >> 8) key_len[3]++;
- key_len[3]++;
- }
- }
- else
- {
- key_len[0] += 2;
- if (wch >> 8) key_len[0]++;
- if (wch & 0xff) key_len[0]++;
- }
- }
- }
- }
-
- if (!dstlen) /* compute length */
- /* 4 * '\1' + key length */
- return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
-
- if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
- return 0; /* overflow */
-
- src = src_save;
- srclen = srclen_save;
-
- key_ptr[0] = dst;
- key_ptr[1] = key_ptr[0] + key_len[0] + 1;
- key_ptr[2] = key_ptr[1] + key_len[1] + 1;
- key_ptr[3] = key_ptr[2] + key_len[2] + 1;
-
- for (; srclen; srclen--, src++)
- {
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
- dummy[0] = *src;
- if (decomposed_len)
- {
- for (i = 0; i < decomposed_len; i++)
- {
- WCHAR wch = dummy[i];
- unsigned int ce;
-
- if ((flags & NORM_IGNORESYMBOLS) &&
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
- continue;
-
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
- if (ce != (unsigned int)-1)
- {
- WCHAR key;
- if ((key = ce >> 16))
- {
- *key_ptr[0]++ = key >> 8;
- *key_ptr[0]++ = key & 0xff;
- }
- /* make key 1 start from 2 */
- if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
- /* make key 2 start from 2 */
- if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
- /* key 3 is always a character code */
- if (ce & 1)
- {
- if (wch >> 8) *key_ptr[3]++ = wch >> 8;
- if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
- }
- }
- else
- {
- *key_ptr[0]++ = 0xff;
- *key_ptr[0]++ = 0xfe;
- if (wch >> 8) *key_ptr[0]++ = wch >> 8;
- if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
- }
- }
- }
- }
-
- *key_ptr[0] = 1;
- *key_ptr[1] = 1;
- *key_ptr[2] = 1;
- *key_ptr[3]++ = 1;
- *key_ptr[3] = 0;
- return key_ptr[3] - dst;
-}
-
-
/* compose a full-width katakana. return consumed source characters. */
static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
{
@@ -2576,6 +2455,280 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
return len1 - len2;
}
+enum sortkey_special_script
+{
+ SORTKEY_UNSORTABLE = 0,
+ SORTKEY_DIACRITIC = 1,
+ SORTKEY_JAPANESE = 3,
+ SORTKEY_JAMO = 4,
+ SORTKEY_CJK = 5,
+ SORTKEY_PUNCTUATION = 6,
+ SORTKEY_SYMBOL_1 = 7,
+ SORTKEY_SYMBOL_2 = 8,
+ SORTKEY_SYMBOL_3 = 9,
+ SORTKEY_SYMBOL_4 = 10,
+ SORTKEY_SYMBOL_5 = 11,
+ SORTKEY_SYMBOL_6 = 12,
+};
+
+#define SORTKEY_MIN_WEIGHT 2
+
+struct character_info
+{
+ BYTE weight_primary;
+ BYTE script_member;
+ BYTE weight_diacritic;
+ BYTE weight_case;
+};
+
+struct sortkey_data
+{
+ BYTE *buffer;
+ int buffer_pos;
+ int buffer_len;
+};
+
+static void sortkey_get_char(struct character_info *info, WCHAR ch)
+{
+ DWORD value = sort.keys[ch];
+
+ info->weight_case = value >> 24;
+ info->weight_diacritic = (value >> 16) & 0xff;
+ info->script_member = (value >> 8) & 0xff;
+ info->weight_primary = value & 0xff;
+}
+
+
+static BOOL sortkey_is_PUA(BYTE script_member)
+{
+ return script_member >= 0xa9 && script_member <= 0xaf;
+}
+
+static void sortkey_add_weight(struct sortkey_data *data, BYTE value)
+{
+ if (data->buffer_pos < data->buffer_len)
+ data->buffer[data->buffer_pos] = value;
+ data->buffer_pos++;
+}
+
+static void sortkey_add_case_weight(struct sortkey_data *data, int flags, BYTE value)
+{
+ if (flags & NORM_IGNORECASE)
+ value &= ~0x18;
+ if (flags & NORM_IGNOREWIDTH)
+ value &= ~0x01;
+
+ sortkey_add_weight(data, value);
+}
+
+static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value, int *last_weighted_pos)
+{
+ sortkey_add_weight(data, value);
+ if (value > SORTKEY_MIN_WEIGHT)
+ *last_weighted_pos = data->buffer_pos;
+}
+
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ case SORTKEY_DIACRITIC:
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_JAMO:
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+
+ case SORTKEY_CJK:
+ sortkey_add_weight(data, 253);
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (flags & NORM_IGNORESYMBOLS)
+ break;
+
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ break;
+
+ default:
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ if (sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+ }
+}
+
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
+{
+ struct character_info info;
+ int old_pos;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_DIACRITIC:
+ old_pos = data->buffer_pos - 1;
+ /*
+ * Diacritic weights are added to the previous weight, if there is one,
+ * rather than being concatenated after it. This may result in overflow,
+ * which is not protected against. */
+
+ if (old_pos >= diacritic_start_pos)
+ {
+ if (old_pos < data->buffer_len)
+ data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
+ }
+ else
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_JAMO:
+ case SORTKEY_CJK:
+ sortkey_add_diacritic_weight(data, SORTKEY_MIN_WEIGHT, last_weighted_pos);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (!(flags & NORM_IGNORESYMBOLS))
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+
+ default:
+ if (!sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+ }
+}
+
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ case SORTKEY_DIACRITIC:
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_CJK:
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (!(flags & NORM_IGNORESYMBOLS))
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ break;
+
+ case SORTKEY_JAMO:
+ default:
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ break;
+ }
+}
+
+static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
+{
+ static const BYTE SORTKEY_SEPARATOR = 1;
+ static const BYTE SORTKEY_TERMINATOR = 0;
+ int i;
+ struct sortkey_data data;
+
+ data.buffer = buffer;
+ data.buffer_pos = 0;
+ data.buffer_len = buffer ? buffer_len : 0;
+
+ if (str_len == -1)
+ str_len = wcslen(str);
+
+ /* Main weights */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_main_weights(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Diacritic weights */
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ int diacritic_start_pos = data.buffer_pos;
+ int last_weighted_pos = data.buffer_pos;
+ for (i = 0; i < str_len; i++)
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
+ /* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
+ data.buffer_pos = last_weighted_pos;
+ }
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Case weights */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_case_weights(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Extra weights */
+ /* TODO */
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Special weights */
+ /* TODO */
+ sortkey_add_weight(&data, SORTKEY_TERMINATOR);
+
+ if (data.buffer_pos <= buffer_len || !buffer)
+ return data.buffer_pos;
+
+ return 0;
+}
static const struct geoinfo *get_geoinfo_ptr( GEOID geoid )
{
@@ -5257,8 +5410,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n",
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;
- else SetLastError( ERROR_INSUFFICIENT_BUFFER );
+ if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
+ SetLastError( ERROR_INSUFFICIENT_BUFFER );
return ret;
}
--
2.29.2

View File

@ -1,311 +0,0 @@
From b32efb18bf0042e1d6fd80956ae69e3189665c82 Mon Sep 17 00:00:00 2001
From: Dmitry Timoshkov <dtimoshkov@codeweavers.com>
Date: Mon, 28 Jul 2003 07:39:25 -0500
Subject: [PATCH] libs: Fix most problems with CompareString.
---
dlls/kernel32/tests/locale.c | 9 +++--
dlls/kernelbase/collation.c | 64 +++++++++++++++++++-----------------
libs/port/collation.c | 64 +++++++++++++++++++-----------------
3 files changed, 72 insertions(+), 65 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index ad52473591c..4b3dbcf495c 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -1944,13 +1944,13 @@ static void test_CompareStringA(void)
todo_wine ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
ret = CompareStringA(lcid, NORM_IGNORECASE | LOCALE_USE_CP_ACP, "#", -1, ".", -1);
- todo_wine ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
+ ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringA(lcid, NORM_IGNORECASE, "_", -1, ".", -1);
- todo_wine ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
+ ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
ret = lstrcmpiA("#", ".");
- todo_wine ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
+ ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
lcid = MAKELCID(MAKELANGID(LANG_POLISH, SUBLANG_DEFAULT), SORT_DEFAULT);
@@ -6304,6 +6304,5 @@ START_TEST(locale)
test_SetThreadUILanguage();
test_NormalizeString();
test_SpecialCasing();
- /* this requires collation table patch to make it MS compatible */
- if (0) test_sorting();
+ test_sorting();
}
diff --git a/dlls/kernelbase/collation.c b/dlls/kernelbase/collation.c
index 26e9512fede..e1d945a1cd8 100644
--- a/dlls/kernelbase/collation.c
+++ b/dlls/kernelbase/collation.c
@@ -44,34 +44,35 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x00000000, 0x02010111, 0x02020111, 0x02030111, 0x02040111, 0x02050111, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x02090111, 0x024b0111, 0x02700111, 0x02a90111, 0x09e00111, 0x02aa0111, 0x02a70111, 0x02690111,
- 0x027a0111, 0x027b0111, 0x02a20111, 0x039f0111, 0x022d0111, 0x02210111, 0x02550111, 0x02a40111,
- 0x0a0b0111, 0x0a0c0111, 0x0a0d0111, 0x0a0e0111, 0x0a0f0111, 0x0a100111, 0x0a110111, 0x0a120111,
- 0x0a130111, 0x0a140111, 0x02370111, 0x02350111, 0x03a30111, 0x03a40111, 0x03a50111, 0x024e0111,
- 0x02a10111, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
+ 0x01290111, 0x020c0111, 0x020d0111, 0x020f0111, 0x02100111, 0x02140111, 0x02190111, 0x01200111,
+ 0x021b0111, 0x022d0111, 0x02350111, 0x02a70111, 0x02370111, 0x01210111, 0x024b0111, 0x024c0111,
+ 0x0a0b0111, 0x0a0c0111, 0x0a0d0111, 0x0a0e0111, 0x0a0f0181, 0x0a100111, 0x0a110111, 0x0a120111,
+ 0x0a130111, 0x0a140111, 0x024e0111, 0x024f0111, 0x02a90111, 0x02aa0111, 0x030a0111, 0x02550111,
+ 0x025f0111, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
0x0ab90151, 0x0ad30161, 0x0ae70141, 0x0af70141, 0x0b030161, 0x0b2b0151, 0x0b330151, 0x0b4b0161,
0x0b670141, 0x0b730141, 0x0b7f0141, 0x0ba70151, 0x0bbf0151, 0x0bd70141, 0x0bef0151, 0x0bfb0141,
- 0x0c030151, 0x0c070141, 0x0c130141, 0x027c0111, 0x02a60111, 0x027d0111, 0x020f0111, 0x021b0111,
- 0x020c0111, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
+ 0x0c030151, 0x0c070141, 0x0c130141, 0x02700111, 0x02780111, 0x02790111, 0x027a0111, 0x027b0111,
+ 0x027c0111, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
0x0ab90111, 0x0ad30111, 0x0ae70111, 0x0af70111, 0x0b030111, 0x0b2b0111, 0x0b330111, 0x0b4b0111,
0x0b670111, 0x0b730111, 0x0b7f0111, 0x0ba70111, 0x0bbf0111, 0x0bd70111, 0x0bef0111, 0x0bfb0111,
- 0x0c030111, 0x0c070111, 0x0c130111, 0x027e0111, 0x03a70111, 0x027f0111, 0x03aa0111, 0x00000000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02060111, 0x00000000, 0x00000000,
+ 0x0c030111, 0x0c070111, 0x0c130111, 0x027d0111, 0x027e0111, 0x027f0111, 0x029c0111, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x02090141, 0x024c0111, 0x09df0111, 0x09e10111, 0x09de0111, 0x09e20111, 0x03a80111, 0x029c0111,
- 0x02140111, 0x029f0111, 0x0a150181, 0x02780111, 0x03a60111, 0x02200111, 0x02a00111, 0x02100111,
- 0x030a0111, 0x03a00111, 0x0a0d0151, 0x0a0e0151, 0x020d0111, 0x0c9f0121, 0x029d0111, 0x025f0111,
- 0x02190111, 0x0a0c0151, 0x0b4b01a1, 0x02790111, 0x0a0c0171, 0x0a0c0171, 0x0a0e0171, 0x024f0111,
+ 0x01290121, 0x029d0111, 0x03a40111, 0x03a50111, 0x03a60111, 0x03a70111, 0x029f0111, 0x03a80111,
+ 0x02a00111, 0x03aa0111, 0x0a150181, 0x03a00111, 0x09de0111, 0x01220111, 0x09df0111, 0x02a10111,
+ 0x09e00111, 0x039f0111, 0x0a0d0151, 0x0a0e0151, 0x02a20111, 0x09e10111, 0x09e20111, 0x09ef0111,
+ 0x02a40111, 0x0a0c0151, 0x0b4b01a1, 0x03a10111, 0x0a0c0171, 0x0a0c0171, 0x0a0c0171, 0x02a60111,
0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a190121, 0x0a3d0151,
0x0a650151, 0x0a650151, 0x0a650151, 0x0a650151, 0x0ad30161, 0x0ad30161, 0x0ad30161, 0x0ad30161,
0x0a5d0121, 0x0b330151, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x03a20111,
- 0x0b530121, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0c070141, 0x0c3b0121, 0x0ba70131,
+ 0x0b530121, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0c070141, 0x0bd40121, 0x0ba70131,
0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a190111, 0x0a3d0111,
0x0a650111, 0x0a650111, 0x0a650111, 0x0a650111, 0x0ad30111, 0x0ad30111, 0x0ad30111, 0x0ad30111,
- 0x0a5d0111, 0x0b330111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x03a10111,
- 0x0b530111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0c070111, 0x0c3b0111, 0x0c070111,
+ 0x0a5d0111, 0x0b330111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x03a30111,
+ 0x0b530111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0c070111, 0x0bd40111, 0x0c070111,
+ /* 0x0100 .. 0x01ff */
0x0a150151, 0x0a150111, 0x0a150151, 0x0a150111, 0x0a150151, 0x0a150111, 0x0a3d0151, 0x0a3d0111,
0x0a3d0151, 0x0a3d0111, 0x0a3d0151, 0x0a3d0111, 0x0a3d0151, 0x0a3d0111, 0x0a490151, 0x0a490111,
0x0a4d0121, 0x0a4d0111, 0x0a650151, 0x0a650111, 0x0a650151, 0x0a650111, 0x0a650151, 0x0a650111,
@@ -155,7 +156,7 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x020d0111, 0x02140111, 0x0c910121, 0x025f0111,
0x0c950131, 0x0c990121, 0x0c9b0121, 0xffffffff, 0x0ca20121, 0xffffffff, 0x0ca80121, 0x0cac0121,
0x0c9b0111, 0x0c910121, 0x0c920131, 0x0c930121, 0x0c940121, 0x0c950131, 0x0c980121, 0x0c990121,
- 0x0c9a0131, 0x0c9b0121, 0x0c9d0131, 0x0c9e0121, 0x0c9f0131, 0x0ca00121, 0x0ca10121, 0x0ca20121,
+ 0x0c9a0131, 0x0c9b0121, 0x0c9d0131, 0x0c9e0121, 0x0c9f0121, 0x0ca00121, 0x0ca10121, 0x0ca20121,
0x0ca30131, 0x0ca50131, 0xffffffff, 0x0ca60131, 0x0ca70121, 0x0ca80121, 0x0ca90131, 0x0caa0121,
0x0cab0121, 0x0cac0121, 0x0c9b0121, 0x0ca80121, 0x0c910111, 0x0c950111, 0x0c990111, 0x0c9b0111,
0x0ca80111, 0x0c910111, 0x0c920111, 0x0c930111, 0x0c940111, 0x0c950111, 0x0c980111, 0x0c990111,
@@ -904,12 +905,13 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0ca80121, 0x0ca80121, 0x0ca80121, 0x0ca80121, 0x0ca50131, 0x02140111, 0x02140111, 0x020c0111,
0xffffffff, 0xffffffff, 0x0cac0111, 0x0cac0111, 0x0cac0111, 0xffffffff, 0x0cac0111, 0x0cac0111,
0x0ca20121, 0x0ca20121, 0x0cac0121, 0x0cac0121, 0x0cac0121, 0x020d0111, 0x02180111, 0xffffffff,
- 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090141,
- 0x02090131, 0x02090131, 0x02090131, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ /* 0x2000 .. 0x20ff */
+ 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090131,
+ 0x02090121, 0x02090121, 0x02090121, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x02250111, 0x02250121, 0x02260111, 0x02270111, 0x02280111, 0x02290111, 0x03a90111, 0x021c0111,
0x026a0111, 0x026b0111, 0x026c0111, 0x026d0111, 0x02710111, 0x02720111, 0x02730111, 0x02740111,
0x02ae0111, 0x02af0111, 0x02b00111, 0x02b10111, 0x02550131, 0x02550131, 0x02550131, 0x02b20111,
- 0x02070111, 0x02080111, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02090141,
+ 0x02070111, 0x02080111, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02090131,
0x02ac0111, 0x02ad0111, 0x02b60111, 0x02b60121, 0x02b60121, 0x02b70111, 0x02b70121, 0x02b70121,
0x02b90111, 0x026e0111, 0x026f0111, 0x02ba0111, 0x024b0131, 0x02540111, 0x02110111, 0x02bb0111,
0x02bc0111, 0x02bd0111, 0x02be0111, 0x02b30111, 0x02a50111, 0x02860111, 0x02870111, 0xffffffff,
@@ -1256,7 +1258,8 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
0x09a90111, 0x09aa0111, 0x09ab0111, 0x09ac0111, 0x09ad0111, 0x09ae0111, 0x09af0111, 0x09b00111,
0x09b10111, 0x09b20111, 0x09b30111, 0x09b40111, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
- 0x02090121, 0x02340111, 0x025e0111, 0x02b80111, 0x09b50111, 0x09d70111, 0x192c0111, 0x0a0b0111,
+ /* 0x3000 .. 0x30ff */
+ 0x02090111, 0x02340111, 0x025e0111, 0x02b80111, 0x09b50111, 0x09d70111, 0x192c0111, 0x0a0b0111,
0x02880111, 0x02890111, 0x028a0111, 0x028b0111, 0x028c0111, 0x028d0111, 0x028e0111, 0x028f0111,
0x02900111, 0x02910111, 0x09b60111, 0x09b70111, 0x02920111, 0x02930111, 0x02940111, 0x02950111,
0x02960111, 0x02970111, 0x02980111, 0x02990111, 0x022a0111, 0x02750111, 0x02760111, 0x02770111,
@@ -1368,14 +1371,14 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0a0c0131, 0x0a0c0131, 0x0a0c0131, 0x0a0c0131, 0x0a0d0131, 0x0a0d0131, 0x0a0d0131, 0x0a0d0131,
0x0a0d0131, 0x0ab901a1, 0x0a490191, 0x0a1501a1, 0x0a290181, 0x0b4b01b1, 0x0b670181, 0xffffffff,
0xffffffff, 0xffffffff, 0xffffffff, 0xfb400151, 0xfb400151, 0xfb400151, 0xfb400151, 0xfb400151,
- 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0af70171, 0x0af70181, 0x0b2b01b1, 0x0a990181,
- 0x0a3d01a1, 0x0af70171, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0c9f0141, 0x0b2b01a1, 0x0af70171,
- 0x0ab901b1, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0bbf01a1, 0x0c9f0141, 0x0b2b01a1, 0x0a490191,
- 0x0af70171, 0x0a910191, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0a3d01a1, 0x0af70171, 0x0b2b01a1,
+ 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0af70171, 0x0af70181, 0x0b2b01b1, 0x0a990181,
+ 0x0a3d01a1, 0x0af70171, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0c9f0131, 0x0b2b01a1, 0x0af70171,
+ 0x0ab901b1, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0bbf01a1, 0x0c9f0131, 0x0b2b01a1, 0x0a490191,
+ 0x0af70171, 0x0a910191, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0a3d01a1, 0x0af70171, 0x0b2b01a1,
0x0a3d01a1, 0x0b2b01a1, 0x0af70171, 0x0b2b01a1, 0x0a3d01a1, 0x0b2b01a1, 0x0af70171, 0x0b2b01a1,
0x0b2b01a1, 0x0b670191, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0b7f01a1, 0x0b7f01a1, 0x0b7f01a1,
- 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1,
- 0x0af70171, 0x0b2b01b1, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0af70171, 0x0b2b01b1,
+ 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1,
+ 0x0af70171, 0x0b2b01b1, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0af70171, 0x0b2b01b1,
0x0af70171, 0x0b2b01b1, 0x0a150191, 0x0a290191, 0x0a3d01a1, 0x0a3d01a1, 0x0a3d01b1, 0x0a3d01b1,
0x0a490191, 0x0a990181, 0x0ab901a1, 0x0ab901b1, 0x0ad301b1, 0x0af70181, 0x0af70181, 0x0af70171,
0x0b0301c1, 0x0b0301c1, 0x0b0301c1, 0x0b0301c1, 0x0b2b01a1, 0x0b2b01a1, 0x0b2b01a1, 0x0b670191,
@@ -1556,7 +1559,7 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0x02110121, 0x02110121, 0x02110121, 0x02110121, 0x021b0131, 0x021b0131, 0x021b0131,
0x022d0131, 0x02340121, 0x02550141, 0xffffffff, 0x02350131, 0x02370131, 0x024e0141, 0x024b0141,
0x02280121, 0x027a0141, 0x027b0131, 0x027e0131, 0x027f0131, 0x02920121, 0x02930121, 0x02a90131,
- 0x02a70131, 0x02a20131, 0x039f0141, 0x02210131, 0x03a30131, 0x03a50131, 0x03a40131, 0xffffffff,
+ 0x02a70131, 0x02a20131, 0x039f0141, 0x02210121, 0x03a30131, 0x03a50131, 0x03a40131, 0xffffffff,
0x02a60131, 0x09e00131, 0x02aa0131, 0x02a10131, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
0x00006831, 0x00006821, 0x00006921, 0xffffffff, 0x00006a21, 0xffffffff, 0x00006b31, 0x00006b21,
0x00006c31, 0x00006c21, 0x00006d31, 0x00006d21, 0x00006e31, 0x00006e21, 0x00006f31, 0x00006f21,
@@ -1576,8 +1579,9 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0f330131, 0x0f390151, 0x0f390141, 0x0f390121, 0x0f390131, 0x0f3e0141, 0x0f3e0131, 0x0f480151,
0x0f480141, 0x0f490161, 0x0f490151, 0x0f490131, 0x0f490141, 0x0f2d0151, 0x0f2d0141, 0x0f2d0151,
0x0f2d0141, 0x0f2d0151, 0x0f2d0141, 0x0f2d0151, 0x0f2d0141, 0xffffffff, 0xffffffff, 0x00000000,
- 0xffffffff, 0x024b0121, 0x02700121, 0x02a90121, 0x09e00121, 0x02aa0121, 0x02a70121, 0x02690121,
- 0x027a0121, 0x027b0121, 0x02a20121, 0x039f0121, 0x022d0121, 0x02210121, 0x02550121, 0x02a40121,
+ /* 0xff00 .. 0xffff */
+ 0xffffffff, 0x024b0121, 0x02700121, 0x02a90121, 0x09e00121, 0x02aa0121, 0x02a70121, 0x02690111,
+ 0x027a0121, 0x027b0121, 0x02a20121, 0x039f0121, 0x022d0121, 0x02210111, 0x02550121, 0x02a40121,
0x0a0b0121, 0x0a0c0121, 0x0a0d0121, 0x0a0e0121, 0x0a0f0121, 0x0a100121, 0x0a110121, 0x0a120121,
0x0a130121, 0x0a140121, 0x02370121, 0x02350121, 0x03a30121, 0x03a40121, 0x03a50121, 0x024e0121,
0x02a10121, 0x0a150161, 0x0a290151, 0x0a3d0161, 0x0a490161, 0x0a650161, 0x0a910161, 0x0a990161,
diff --git a/libs/port/collation.c b/libs/port/collation.c
index 26e9512fede..e1d945a1cd8 100644
--- a/libs/port/collation.c
+++ b/libs/port/collation.c
@@ -44,34 +44,35 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x00000000, 0x02010111, 0x02020111, 0x02030111, 0x02040111, 0x02050111, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x02090111, 0x024b0111, 0x02700111, 0x02a90111, 0x09e00111, 0x02aa0111, 0x02a70111, 0x02690111,
- 0x027a0111, 0x027b0111, 0x02a20111, 0x039f0111, 0x022d0111, 0x02210111, 0x02550111, 0x02a40111,
- 0x0a0b0111, 0x0a0c0111, 0x0a0d0111, 0x0a0e0111, 0x0a0f0111, 0x0a100111, 0x0a110111, 0x0a120111,
- 0x0a130111, 0x0a140111, 0x02370111, 0x02350111, 0x03a30111, 0x03a40111, 0x03a50111, 0x024e0111,
- 0x02a10111, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
+ 0x01290111, 0x020c0111, 0x020d0111, 0x020f0111, 0x02100111, 0x02140111, 0x02190111, 0x01200111,
+ 0x021b0111, 0x022d0111, 0x02350111, 0x02a70111, 0x02370111, 0x01210111, 0x024b0111, 0x024c0111,
+ 0x0a0b0111, 0x0a0c0111, 0x0a0d0111, 0x0a0e0111, 0x0a0f0181, 0x0a100111, 0x0a110111, 0x0a120111,
+ 0x0a130111, 0x0a140111, 0x024e0111, 0x024f0111, 0x02a90111, 0x02aa0111, 0x030a0111, 0x02550111,
+ 0x025f0111, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
0x0ab90151, 0x0ad30161, 0x0ae70141, 0x0af70141, 0x0b030161, 0x0b2b0151, 0x0b330151, 0x0b4b0161,
0x0b670141, 0x0b730141, 0x0b7f0141, 0x0ba70151, 0x0bbf0151, 0x0bd70141, 0x0bef0151, 0x0bfb0141,
- 0x0c030151, 0x0c070141, 0x0c130141, 0x027c0111, 0x02a60111, 0x027d0111, 0x020f0111, 0x021b0111,
- 0x020c0111, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
+ 0x0c030151, 0x0c070141, 0x0c130141, 0x02700111, 0x02780111, 0x02790111, 0x027a0111, 0x027b0111,
+ 0x027c0111, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
0x0ab90111, 0x0ad30111, 0x0ae70111, 0x0af70111, 0x0b030111, 0x0b2b0111, 0x0b330111, 0x0b4b0111,
0x0b670111, 0x0b730111, 0x0b7f0111, 0x0ba70111, 0x0bbf0111, 0x0bd70111, 0x0bef0111, 0x0bfb0111,
- 0x0c030111, 0x0c070111, 0x0c130111, 0x027e0111, 0x03a70111, 0x027f0111, 0x03aa0111, 0x00000000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02060111, 0x00000000, 0x00000000,
+ 0x0c030111, 0x0c070111, 0x0c130111, 0x027d0111, 0x027e0111, 0x027f0111, 0x029c0111, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x02090141, 0x024c0111, 0x09df0111, 0x09e10111, 0x09de0111, 0x09e20111, 0x03a80111, 0x029c0111,
- 0x02140111, 0x029f0111, 0x0a150181, 0x02780111, 0x03a60111, 0x02200111, 0x02a00111, 0x02100111,
- 0x030a0111, 0x03a00111, 0x0a0d0151, 0x0a0e0151, 0x020d0111, 0x0c9f0121, 0x029d0111, 0x025f0111,
- 0x02190111, 0x0a0c0151, 0x0b4b01a1, 0x02790111, 0x0a0c0171, 0x0a0c0171, 0x0a0e0171, 0x024f0111,
+ 0x01290121, 0x029d0111, 0x03a40111, 0x03a50111, 0x03a60111, 0x03a70111, 0x029f0111, 0x03a80111,
+ 0x02a00111, 0x03aa0111, 0x0a150181, 0x03a00111, 0x09de0111, 0x01220111, 0x09df0111, 0x02a10111,
+ 0x09e00111, 0x039f0111, 0x0a0d0151, 0x0a0e0151, 0x02a20111, 0x09e10111, 0x09e20111, 0x09ef0111,
+ 0x02a40111, 0x0a0c0151, 0x0b4b01a1, 0x03a10111, 0x0a0c0171, 0x0a0c0171, 0x0a0c0171, 0x02a60111,
0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a150151, 0x0a190121, 0x0a3d0151,
0x0a650151, 0x0a650151, 0x0a650151, 0x0a650151, 0x0ad30161, 0x0ad30161, 0x0ad30161, 0x0ad30161,
0x0a5d0121, 0x0b330151, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x0b4b0161, 0x03a20111,
- 0x0b530121, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0c070141, 0x0c3b0121, 0x0ba70131,
+ 0x0b530121, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0bd70141, 0x0c070141, 0x0bd40121, 0x0ba70131,
0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a150111, 0x0a190111, 0x0a3d0111,
0x0a650111, 0x0a650111, 0x0a650111, 0x0a650111, 0x0ad30111, 0x0ad30111, 0x0ad30111, 0x0ad30111,
- 0x0a5d0111, 0x0b330111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x03a10111,
- 0x0b530111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0c070111, 0x0c3b0111, 0x0c070111,
+ 0x0a5d0111, 0x0b330111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x0b4b0111, 0x03a30111,
+ 0x0b530111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0bd70111, 0x0c070111, 0x0bd40111, 0x0c070111,
+ /* 0x0100 .. 0x01ff */
0x0a150151, 0x0a150111, 0x0a150151, 0x0a150111, 0x0a150151, 0x0a150111, 0x0a3d0151, 0x0a3d0111,
0x0a3d0151, 0x0a3d0111, 0x0a3d0151, 0x0a3d0111, 0x0a3d0151, 0x0a3d0111, 0x0a490151, 0x0a490111,
0x0a4d0121, 0x0a4d0111, 0x0a650151, 0x0a650111, 0x0a650151, 0x0a650111, 0x0a650151, 0x0a650111,
@@ -155,7 +156,7 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x020d0111, 0x02140111, 0x0c910121, 0x025f0111,
0x0c950131, 0x0c990121, 0x0c9b0121, 0xffffffff, 0x0ca20121, 0xffffffff, 0x0ca80121, 0x0cac0121,
0x0c9b0111, 0x0c910121, 0x0c920131, 0x0c930121, 0x0c940121, 0x0c950131, 0x0c980121, 0x0c990121,
- 0x0c9a0131, 0x0c9b0121, 0x0c9d0131, 0x0c9e0121, 0x0c9f0131, 0x0ca00121, 0x0ca10121, 0x0ca20121,
+ 0x0c9a0131, 0x0c9b0121, 0x0c9d0131, 0x0c9e0121, 0x0c9f0121, 0x0ca00121, 0x0ca10121, 0x0ca20121,
0x0ca30131, 0x0ca50131, 0xffffffff, 0x0ca60131, 0x0ca70121, 0x0ca80121, 0x0ca90131, 0x0caa0121,
0x0cab0121, 0x0cac0121, 0x0c9b0121, 0x0ca80121, 0x0c910111, 0x0c950111, 0x0c990111, 0x0c9b0111,
0x0ca80111, 0x0c910111, 0x0c920111, 0x0c930111, 0x0c940111, 0x0c950111, 0x0c980111, 0x0c990111,
@@ -904,12 +905,13 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0ca80121, 0x0ca80121, 0x0ca80121, 0x0ca80121, 0x0ca50131, 0x02140111, 0x02140111, 0x020c0111,
0xffffffff, 0xffffffff, 0x0cac0111, 0x0cac0111, 0x0cac0111, 0xffffffff, 0x0cac0111, 0x0cac0111,
0x0ca20121, 0x0ca20121, 0x0cac0121, 0x0cac0121, 0x0cac0121, 0x020d0111, 0x02180111, 0xffffffff,
- 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090131, 0x02090141,
- 0x02090131, 0x02090131, 0x02090131, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ /* 0x2000 .. 0x20ff */
+ 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090121, 0x02090131,
+ 0x02090121, 0x02090121, 0x02090121, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x02250111, 0x02250121, 0x02260111, 0x02270111, 0x02280111, 0x02290111, 0x03a90111, 0x021c0111,
0x026a0111, 0x026b0111, 0x026c0111, 0x026d0111, 0x02710111, 0x02720111, 0x02730111, 0x02740111,
0x02ae0111, 0x02af0111, 0x02b00111, 0x02b10111, 0x02550131, 0x02550131, 0x02550131, 0x02b20111,
- 0x02070111, 0x02080111, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02090141,
+ 0x02070111, 0x02080111, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02090131,
0x02ac0111, 0x02ad0111, 0x02b60111, 0x02b60121, 0x02b60121, 0x02b70111, 0x02b70121, 0x02b70121,
0x02b90111, 0x026e0111, 0x026f0111, 0x02ba0111, 0x024b0131, 0x02540111, 0x02110111, 0x02bb0111,
0x02bc0111, 0x02bd0111, 0x02be0111, 0x02b30111, 0x02a50111, 0x02860111, 0x02870111, 0xffffffff,
@@ -1256,7 +1258,8 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
0x09a90111, 0x09aa0111, 0x09ab0111, 0x09ac0111, 0x09ad0111, 0x09ae0111, 0x09af0111, 0x09b00111,
0x09b10111, 0x09b20111, 0x09b30111, 0x09b40111, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
- 0x02090121, 0x02340111, 0x025e0111, 0x02b80111, 0x09b50111, 0x09d70111, 0x192c0111, 0x0a0b0111,
+ /* 0x3000 .. 0x30ff */
+ 0x02090111, 0x02340111, 0x025e0111, 0x02b80111, 0x09b50111, 0x09d70111, 0x192c0111, 0x0a0b0111,
0x02880111, 0x02890111, 0x028a0111, 0x028b0111, 0x028c0111, 0x028d0111, 0x028e0111, 0x028f0111,
0x02900111, 0x02910111, 0x09b60111, 0x09b70111, 0x02920111, 0x02930111, 0x02940111, 0x02950111,
0x02960111, 0x02970111, 0x02980111, 0x02990111, 0x022a0111, 0x02750111, 0x02760111, 0x02770111,
@@ -1368,14 +1371,14 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0a0c0131, 0x0a0c0131, 0x0a0c0131, 0x0a0c0131, 0x0a0d0131, 0x0a0d0131, 0x0a0d0131, 0x0a0d0131,
0x0a0d0131, 0x0ab901a1, 0x0a490191, 0x0a1501a1, 0x0a290181, 0x0b4b01b1, 0x0b670181, 0xffffffff,
0xffffffff, 0xffffffff, 0xffffffff, 0xfb400151, 0xfb400151, 0xfb400151, 0xfb400151, 0xfb400151,
- 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0af70171, 0x0af70181, 0x0b2b01b1, 0x0a990181,
- 0x0a3d01a1, 0x0af70171, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0c9f0141, 0x0b2b01a1, 0x0af70171,
- 0x0ab901b1, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0bbf01a1, 0x0c9f0141, 0x0b2b01a1, 0x0a490191,
- 0x0af70171, 0x0a910191, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0a3d01a1, 0x0af70171, 0x0b2b01a1,
+ 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0af70171, 0x0af70181, 0x0b2b01b1, 0x0a990181,
+ 0x0a3d01a1, 0x0af70171, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0c9f0131, 0x0b2b01a1, 0x0af70171,
+ 0x0ab901b1, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0bbf01a1, 0x0c9f0131, 0x0b2b01a1, 0x0a490191,
+ 0x0af70171, 0x0a910191, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0a3d01a1, 0x0af70171, 0x0b2b01a1,
0x0a3d01a1, 0x0b2b01a1, 0x0af70171, 0x0b2b01a1, 0x0a3d01a1, 0x0b2b01a1, 0x0af70171, 0x0b2b01a1,
0x0b2b01a1, 0x0b670191, 0x0af70171, 0x0b2b01b1, 0x0a990181, 0x0b7f01a1, 0x0b7f01a1, 0x0b7f01a1,
- 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1,
- 0x0af70171, 0x0b2b01b1, 0x0b670181, 0x0b3301b1, 0x0c9f0141, 0x0b2b01a1, 0x0af70171, 0x0b2b01b1,
+ 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1,
+ 0x0af70171, 0x0b2b01b1, 0x0b670181, 0x0b3301b1, 0x0c9f0131, 0x0b2b01a1, 0x0af70171, 0x0b2b01b1,
0x0af70171, 0x0b2b01b1, 0x0a150191, 0x0a290191, 0x0a3d01a1, 0x0a3d01a1, 0x0a3d01b1, 0x0a3d01b1,
0x0a490191, 0x0a990181, 0x0ab901a1, 0x0ab901b1, 0x0ad301b1, 0x0af70181, 0x0af70181, 0x0af70171,
0x0b0301c1, 0x0b0301c1, 0x0b0301c1, 0x0b0301c1, 0x0b2b01a1, 0x0b2b01a1, 0x0b2b01a1, 0x0b670191,
@@ -1556,7 +1559,7 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0xffffffff, 0x02110121, 0x02110121, 0x02110121, 0x02110121, 0x021b0131, 0x021b0131, 0x021b0131,
0x022d0131, 0x02340121, 0x02550141, 0xffffffff, 0x02350131, 0x02370131, 0x024e0141, 0x024b0141,
0x02280121, 0x027a0141, 0x027b0131, 0x027e0131, 0x027f0131, 0x02920121, 0x02930121, 0x02a90131,
- 0x02a70131, 0x02a20131, 0x039f0141, 0x02210131, 0x03a30131, 0x03a50131, 0x03a40131, 0xffffffff,
+ 0x02a70131, 0x02a20131, 0x039f0141, 0x02210121, 0x03a30131, 0x03a50131, 0x03a40131, 0xffffffff,
0x02a60131, 0x09e00131, 0x02aa0131, 0x02a10131, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
0x00006831, 0x00006821, 0x00006921, 0xffffffff, 0x00006a21, 0xffffffff, 0x00006b31, 0x00006b21,
0x00006c31, 0x00006c21, 0x00006d31, 0x00006d21, 0x00006e31, 0x00006e21, 0x00006f31, 0x00006f21,
@@ -1576,8 +1579,9 @@ const unsigned int DECLSPEC_HIDDEN collation_table[12800] =
0x0f330131, 0x0f390151, 0x0f390141, 0x0f390121, 0x0f390131, 0x0f3e0141, 0x0f3e0131, 0x0f480151,
0x0f480141, 0x0f490161, 0x0f490151, 0x0f490131, 0x0f490141, 0x0f2d0151, 0x0f2d0141, 0x0f2d0151,
0x0f2d0141, 0x0f2d0151, 0x0f2d0141, 0x0f2d0151, 0x0f2d0141, 0xffffffff, 0xffffffff, 0x00000000,
- 0xffffffff, 0x024b0121, 0x02700121, 0x02a90121, 0x09e00121, 0x02aa0121, 0x02a70121, 0x02690121,
- 0x027a0121, 0x027b0121, 0x02a20121, 0x039f0121, 0x022d0121, 0x02210121, 0x02550121, 0x02a40121,
+ /* 0xff00 .. 0xffff */
+ 0xffffffff, 0x024b0121, 0x02700121, 0x02a90121, 0x09e00121, 0x02aa0121, 0x02a70121, 0x02690111,
+ 0x027a0121, 0x027b0121, 0x02a20121, 0x039f0121, 0x022d0121, 0x02210111, 0x02550121, 0x02a40121,
0x0a0b0121, 0x0a0c0121, 0x0a0d0121, 0x0a0e0121, 0x0a0f0121, 0x0a100121, 0x0a110121, 0x0a120121,
0x0a130121, 0x0a140121, 0x02370121, 0x02350121, 0x03a30121, 0x03a40121, 0x03a50121, 0x024e0121,
0x02a10121, 0x0a150161, 0x0a290151, 0x0a3d0161, 0x0a490161, 0x0a650161, 0x0a910161, 0x0a990161,
--
2.17.1

View File

@ -0,0 +1,128 @@
From dca517521550923c881c95659f2309756c84d597 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:47:15 +0200
Subject: [PATCH] kernelbase: Implement sortkey punctuation
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 29 +++++++++++++++++++++++++++++
dlls/kernelbase/locale.c | 35 +++++++++++++++++++++++++++++++----
2 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index d875bf94f92..25c460f4175 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3210,6 +3210,35 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"c", L"C" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"e", L"E" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"A", L"a" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x001b", L"\x001c" }, /* Punctuation primary weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0005", L"\x0006" }, /* Punctuation primary weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027", L"\xff07", TRUE }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f4", L"\x07f5", TRUE }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff07", L"\x07f4" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe32", L"\x2014" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x058a", L"\x2027" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
+
+
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 3a29f3e8250..bce705ab484 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2556,7 +2556,11 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
break;
case SORTKEY_SYMBOL_1:
@@ -2619,7 +2623,9 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
break;
case SORTKEY_SYMBOL_1:
@@ -2660,7 +2666,9 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+ sortkey_add_case_weight(data, flags, info.weight_case);
break;
case SORTKEY_SYMBOL_1:
@@ -2680,6 +2688,24 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
}
}
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ BYTE weight_second;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_PUNCTUATION)
+ {
+ if ((flags & NORM_IGNORESYMBOLS) || (flags & SORT_STRINGSORT))
+ return;
+
+ weight_second = (BYTE)(info.weight_diacritic * 8 + info.weight_case);
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, weight_second);
+ }
+}
+
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
@@ -2721,7 +2747,8 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
- /* TODO */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_special_weights(&data, flags, str[i]);
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
if (data.buffer_pos <= buffer_len || !buffer)
--
2.29.2

View File

@ -0,0 +1,273 @@
From 0f23a9db326dd6040b2d41fac99bd495f718d63d Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:49:02 +0200
Subject: [PATCH] kernelbase: Implement sortkey for Japanese characters
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 94 ++++++++++++++++++++++++++++++-
dlls/kernelbase/locale.c | 104 +++++++++++++++++++++++++++++++++--
2 files changed, 192 insertions(+), 6 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 25c460f4175..43a244d2a6b 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3237,8 +3237,98 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
-
-
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011", }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c", }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation SORT_STRINGSORT diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation SORT_STRINGSORT diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x04b0", L"\x32db" }, /* Japanese main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3093", L"\x1e62\x013f" }, /* Japanese main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d3", L"\x30d4" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x307b", L"\x307c" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ea", L"\x32f7" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31a2", L"\x3110" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x1342", L"\x133a" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b1\x30f6", L"\xff79\x30b1" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a5", L"\xff73\x30a6" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8\x30a7", L"\xff74\x30a8" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b1", L"\xff79" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a8", L"\xff74" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3046\x30a9", L"\x30a6\x30aa" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x304a\x3041", L"\x30aa\x3042" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3059\x30a7", L"\x30b9\x30a8" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3046", L"\x30a6" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x304a", L"\x30aa" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3059", L"\x30b9" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a8", L"\xff73\x3048" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ab\x30a3", L"\xff76\x3043" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b5\x30ac", L"\xff7b\x304c" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b5", L"\xff7b" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x305a a", L"\x30ba A" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30c1 b", L"\xff81 B" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff8b x", L"\x31f6 X" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30c1", L"\xff81" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xff8b", L"\x31f6" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027\x31ff", L"\x007f\xff9b" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f5\x30f3", L"\x07f4\x3093" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe63\x30e0", L"\xff0d\x3080" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0027", L"\x007f" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x07f5", L"\x07f4" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe63", L"\xff0d" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff68", L"\x30a3" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff75", L"\x30aa" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30e2", L"\xff93" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff68", L"\x30a3" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff75", L"\x30aa" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30e2", L"\xff93" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30a8", L"\x3048" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30af", L"\x304f" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x3067", L"\x30c7" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8", L"\x3048" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30af", L"\x304f" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3067", L"\x30c7" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb7", L"\x3147" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb6", L"\x3146" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x3145", L"\xffb5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb7", L"\x3147" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb6", L"\x3146" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x3145", L"\xffb5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cff", L"\x30ba" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cdb", L"\x32de" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2ce0", L"\x30c6" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index bce705ab484..feec402cb61 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2473,6 +2473,11 @@ enum sortkey_special_script
#define SORTKEY_MIN_WEIGHT 2
+const BYTE SORTKEY_FLAGS_EXTRA = 0xc4; /* Extra data added to the flags values */
+const BYTE SORTKEY_FLAG_HIRAGANA = 0x20; /* if bit is set then hiragana, else katakana */
+const BYTE SORTKEY_FLAG_LARGE = 0x02; /* if bit is set then normal kana, else small kana */
+const BYTE SORTKEY_FLAG_FULLWIDTH = 0x01; /* if bit is set then full width, else half width */
+
struct character_info
{
BYTE weight_primary;
@@ -2541,7 +2546,15 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ sortkey_add_weight(data, 34);
+ sortkey_add_weight(data, info.weight_primary);
+ }
break;
case SORTKEY_JAMO:
@@ -2614,7 +2627,12 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
break;
case SORTKEY_JAMO:
@@ -2658,7 +2676,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
break;
case SORTKEY_CJK:
@@ -2706,10 +2729,75 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
}
}
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_LARGE) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+ }
+}
+
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (flags & NORM_IGNOREKANATYPE)
+ info.weight_case = 0;
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_HIRAGANA) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+}
+
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (flags & NORM_IGNOREWIDTH)
+ info.weight_case = 0;
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_FULLWIDTH) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+}
+
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
static const BYTE SORTKEY_TERMINATOR = 0;
+ static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
int i;
struct sortkey_data data;
@@ -2743,7 +2831,15 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Extra weights */
- /* TODO */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_small(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_kana(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_width(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
--
2.29.2

View File

@ -0,0 +1,188 @@
From 9ccd944af35dc418a09a17ab70619b37e598ea43 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:49:45 +0200
Subject: [PATCH] kernelbase: Implement sortkey expansion
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 6 +++
dlls/kernelbase/locale.c | 91 +++++++++++++++++++++++++++++++++++-
2 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 43a244d2a6b..e8adb32bbbd 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3329,6 +3329,12 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x00c6", L"\x0041\x0045" }, /* Expansion */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x0f5c", L"\x0f5b\x0fb7" }, /* Expansion */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x05f0", L"\x05d5\x05d5" }, /* Expansion */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index feec402cb61..da358d74934 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2459,6 +2459,7 @@ enum sortkey_special_script
{
SORTKEY_UNSORTABLE = 0,
SORTKEY_DIACRITIC = 1,
+ SORTKEY_EXPANSION = 2,
SORTKEY_JAPANESE = 3,
SORTKEY_JAMO = 4,
SORTKEY_CJK = 5,
@@ -2496,13 +2497,27 @@ struct sortkey_data
static void sortkey_get_char(struct character_info *info, WCHAR ch)
{
DWORD value = sort.keys[ch];
-
info->weight_case = value >> 24;
info->weight_diacritic = (value >> 16) & 0xff;
info->script_member = (value >> 8) & 0xff;
info->weight_primary = value & 0xff;
}
+static const WCHAR* sortkey_get_expansion(WCHAR ch)
+{
+ DWORD pos_info = sort.keys[ch];
+ unsigned int pos = pos_info >> 16;
+ const DWORD *ptr;
+ unsigned int count_expansion;
+ if ((WORD)pos_info != 0x200) /* Check for expansion magic number */
+ return NULL;
+ ptr = (const DWORD *)(sort.guids + sort.guid_count);
+ count_expansion = *ptr++;
+ if (pos >= count_expansion)
+ return NULL;
+ return (const WCHAR *)(ptr + pos);
+}
+
static BOOL sortkey_is_PUA(BYTE script_member)
{
@@ -2533,6 +2548,27 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
*last_weighted_pos = data->buffer_pos;
}
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_main(data, flags, expansion[0]);
+ sortkey_handle_expansion_main(data, flags, expansion[1]);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ if (sortkey_is_PUA(info.script_member))
+ sortkey_add_weight(data, info.weight_diacritic);
+ }
+}
+
static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
{
struct character_info info;
@@ -2542,6 +2578,12 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
switch (info.script_member)
{
case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_main(data, flags, c);
+ break;
+
case SORTKEY_DIACRITIC:
break;
@@ -2598,6 +2640,25 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
}
}
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ if (!sortkey_is_PUA(info.script_member))
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ }
+}
+
static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
{
struct character_info info;
@@ -2610,6 +2671,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
case SORTKEY_UNSORTABLE:
break;
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
+ break;
+
case SORTKEY_DIACRITIC:
old_pos = data->buffer_pos - 1;
/*
@@ -2663,6 +2728,24 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
}
}
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_case(data, flags, expansion[0]);
+ sortkey_handle_expansion_case(data, flags, expansion[1]);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ }
+}
+
static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
{
struct character_info info;
@@ -2672,6 +2755,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
switch (info.script_member)
{
case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_case(data, flags, c);
+ break;
+
case SORTKEY_DIACRITIC:
break;
--
2.29.2

View File

@ -0,0 +1,377 @@
From 1ef8d8455449563aaa1d3c9ce16444134f3a9ef4 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 17:32:56 +0200
Subject: [PATCH] kernelbase: Implement sortkey language support
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 50 ++++++++++++++++++
dlls/kernelbase/locale.c | 99 +++++++++++++++++++++---------------
2 files changed, 109 insertions(+), 40 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index e8adb32bbbd..e4a31c120f0 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3335,6 +3335,56 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6df8", L"\x654b\x29e9" }, /* Japanese locale */
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x685d\x1239\x1b61", L"\x59b6\x6542\x2a62\x04a7" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x62f3\x43e9", L"\x5760" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x634c", L"\x2f0d\x5f1c\x7124" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x69e7\x0502", L"\x57cc" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7589", L"\x67c5" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5ede\x765c", L"\x7324" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5c7f\x5961", L"\x7cbe" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3162", L"\x6a84\x1549\x0b60" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x769e\x448e", L"\x4e6e" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x59a4", L"\x5faa\x607c" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x529b", L"\x733f" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6ff8\x2a0a", L"\x7953\x6712" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6dfb", L"\x6793" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x67ed", L"\x6aa2" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4e61", L"\x6350\x6b08" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5118", L"\x53b3\x75b4" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6bbf", L"\x65a3" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5690", L"\x5fa8" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x61e2", L"\x76e5" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8db6", L"\xd198" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8f72", L"\xd2b9" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x91d8", L"\xd318" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8db6", L"\xd198" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8f72", L"\xd2b9" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x91d8", L"\xd318" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0160", L"\x0219" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x059a", L"\x0308" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x013a", L"\x013f" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0160", L"\x0219" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x059a", L"\x0308" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x013a", L"\x013f" },
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1d8f", L"\x1ea8" },
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0323", L"\xfe26" },
+ { L"vi-VN", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"R", L"\xff32" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d8f", L"\x1ea8" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0323", L"\xfe26" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"R", L"\xff32" },
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x83ae", L"\x71b9" },
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7e50", L"\xc683" },
+ { L"zh-HK", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6c69", L"\x7f8a" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x83ae", L"\x71b9" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x7e50", L"\xc683" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6c69", L"\x7f8a" },
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00dc", L"\x1ee9" },
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00fc", L"\x1ee6" },
+ { L"tr-TR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0152", L"\x00d6" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index da358d74934..ef135e48c2f 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2494,9 +2494,27 @@ struct sortkey_data
int buffer_len;
};
-static void sortkey_get_char(struct character_info *info, WCHAR ch)
+static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
{
- DWORD value = sort.keys[ch];
+ if (locale && locale->except)
+ {
+ DWORD *table = sort.keys + locale->except;
+ DWORD hi = ch >> 8;
+ DWORD lo = ch & 0xff;
+ if (table[hi] == hi * 0x100)
+ return 0;
+ if (sort.keys[table[hi] + lo] == sort.keys[hi * 0x100 + lo])
+ return 0;
+ return sort.keys[table[hi] + lo];
+ }
+ return 0;
+}
+
+static void sortkey_get_char(struct character_info *info, WCHAR ch, const struct sortguid *locale)
+{
+ DWORD value = sortkey_get_exception(ch, locale);
+ if (!value)
+ value = sort.keys[ch];
info->weight_case = value >> 24;
info->weight_diacritic = (value >> 16) & 0xff;
info->script_member = (value >> 8) & 0xff;
@@ -2548,18 +2566,18 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
*last_weighted_pos = data->buffer_pos;
}
-static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_main(data, flags, expansion[0]);
- sortkey_handle_expansion_main(data, flags, expansion[1]);
+ sortkey_handle_expansion_main(data, flags, expansion[0], locale);
+ sortkey_handle_expansion_main(data, flags, expansion[1], locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
sortkey_add_weight(data, info.script_member);
@@ -2569,11 +2587,11 @@ static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags,
}
}
-static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2581,7 +2599,7 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_main(data, flags, c);
+ sortkey_handle_expansion_main(data, flags, c, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2640,18 +2658,18 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
}
}
-static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
- sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos, locale);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos, locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
if (!sortkey_is_PUA(info.script_member))
@@ -2659,12 +2677,12 @@ static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int fl
}
}
-static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos, const struct sortguid *locale)
{
struct character_info info;
int old_pos;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2672,7 +2690,7 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2728,29 +2746,29 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
}
}
-static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_case(data, flags, expansion[0]);
- sortkey_handle_expansion_case(data, flags, expansion[1]);
+ sortkey_handle_expansion_case(data, flags, expansion[0], locale);
+ sortkey_handle_expansion_case(data, flags, expansion[1], locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
sortkey_add_case_weight(data, flags, info.weight_case);
}
}
-static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2758,7 +2776,7 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_case(data, flags, c);
+ sortkey_handle_expansion_case(data, flags, c, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2800,12 +2818,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
}
}
-static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
BYTE weight_second;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_PUNCTUATION)
{
@@ -2818,11 +2836,11 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
}
}
-static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2840,11 +2858,11 @@ static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags
}
}
-static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2861,11 +2879,11 @@ static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags,
}
}
-static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2882,13 +2900,14 @@ static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags
}
}
-static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
+static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
static const BYTE SORTKEY_TERMINATOR = 0;
static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
int i;
struct sortkey_data data;
+ const struct sortguid *locale = get_language_sort(locale_name);
data.buffer = buffer;
data.buffer_pos = 0;
@@ -2899,7 +2918,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
/* Main weights */
for (i = 0; i < str_len; i++)
- sortkey_add_main_weights(&data, flags, str[i]);
+ sortkey_add_main_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Diacritic weights */
@@ -2908,7 +2927,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
int diacritic_start_pos = data.buffer_pos;
int last_weighted_pos = data.buffer_pos;
for (i = 0; i < str_len; i++)
- sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos, locale);
/* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
data.buffer_pos = last_weighted_pos;
}
@@ -2916,24 +2935,24 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
/* Case weights */
for (i = 0; i < str_len; i++)
- sortkey_add_case_weights(&data, flags, str[i]);
+ sortkey_add_case_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Extra weights */
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_small(&data, flags, str[i]);
+ sortkey_add_extra_weights_small(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_kana(&data, flags, str[i]);
+ sortkey_add_extra_weights_kana(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_width(&data, flags, str[i]);
+ sortkey_add_extra_weights_width(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
for (i = 0; i < str_len; i++)
- sortkey_add_special_weights(&data, flags, str[i]);
+ sortkey_add_special_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
if (data.buffer_pos <= buffer_len || !buffer)
@@ -5622,7 +5641,7 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n",
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
+ if (!(ret = sortkey_generate(flags, locale, src, srclen, (BYTE *)dst, dstlen )))
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return ret;
}
--
2.29.2

View File

@ -0,0 +1,479 @@
From 4c9ae78eef8d1fc6f1bdc1c23d18719865d1cff8 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sun, 6 Dec 2020 20:57:16 +0100
Subject: [PATCH] kernelbase: Implement CompareString functions
---
dlls/kernel32/tests/locale.c | 35 +++--
dlls/kernelbase/locale.c | 286 +++++++++++++++++++----------------
2 files changed, 175 insertions(+), 146 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index e4a31c120f0..e04c85fb45c 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -1955,16 +1955,16 @@ static void test_CompareStringA(void)
"a\\0b vs a expected CSTR_EQUAL or CSTR_GREATER_THAN, got %d\n", ret);
ret = CompareStringA(lcid, 0, "\2", 2, "\1", 2);
- todo_wine ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
+ ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
ret = CompareStringA(lcid, NORM_IGNORECASE | LOCALE_USE_CP_ACP, "#", -1, ".", -1);
- todo_wine ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
+ ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringA(lcid, NORM_IGNORECASE, "_", -1, ".", -1);
- todo_wine ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
+ ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
ret = lstrcmpiA("#", ".");
- todo_wine ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
+ ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
lcid = MAKELCID(MAKELANGID(LANG_POLISH, SUBLANG_DEFAULT), SORT_DEFAULT);
@@ -2045,9 +2045,9 @@ static void test_CompareStringW(void)
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(CP_ACP, NORM_IGNORENONSPACE, ABC_EE, 3, A_ACUTE_BC, 4);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(CP_ACP, NORM_IGNORENONSPACE, ABC_EE, 4, A_ACUTE_BC_DECOMP, 5);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(CP_ACP, NORM_IGNORENONSPACE, A_ACUTE_BC, 4, A_ACUTE_BC_DECOMP, 5);
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
@@ -2059,12 +2059,12 @@ static void test_CompareStringW(void)
ret = CompareStringW(CP_ACP, 0, A_NULL_BC, 4, A_ACUTE_BC, 4);
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringW(CP_ACP, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC, 4);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(CP_ACP, 0, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringW(CP_ACP, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
}
struct comparestringex_test {
@@ -2101,7 +2101,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 5 */
"tr-TR", 0,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_IGNORECASE */
{ /* 6 */
@@ -2126,7 +2126,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 11 */
"tr-TR", NORM_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING */
{ /* 12 */
@@ -2151,7 +2151,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 17 */
"tr-TR", NORM_LINGUISTIC_CASING,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with LINGUISTIC_IGNORECASE */
{ /* 18 */
@@ -2176,7 +2176,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 23 */
"tr-TR", LINGUISTIC_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING | NORM_IGNORECASE */
{ /* 24 */
@@ -2201,7 +2201,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 29 */
"tr-TR", NORM_LINGUISTIC_CASING | NORM_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE */
{ /* 30 */
@@ -2226,7 +2226,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 35 */
"tr-TR", NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, FALSE
}
};
@@ -3385,6 +3385,9 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa042\x09bc", L"\xa042" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa063\x302b", L"\xa063" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa07e\x0c56", L"\xa07e" }, /* Diacritic is added */
};
static void test_unicode_sorting(void)
@@ -6123,7 +6126,7 @@ static void test_FindNLSStringEx(void)
{ localeW, FIND_FROMSTART, comb_s_accent1W, ARRAY_SIZE(comb_s_accent1W)-1,
comb_s_accent2W, ARRAY_SIZE(comb_s_accent2W)-1, 0, 0, 6, 1, TRUE },
{ localeW, FIND_FROMSTART, comb_q_accent1W, ARRAY_SIZE(comb_q_accent1W)-1,
- comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 1, FALSE },
+ comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 0, FALSE },
{ 0 }
};
struct test_data *ptest;
@@ -7277,6 +7280,6 @@ START_TEST(locale)
test_SpecialCasing();
test_NLSVersion();
/* this requires collation table patch to make it MS compatible */
- if (0) test_sorting();
+ test_sorting();
test_unicode_sorting();
}
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index ef135e48c2f..21a823063f3 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2335,126 +2335,6 @@ static int map_to_halfwidth( WCHAR c, WCHAR *dst, int dstlen )
return 1;
}
-
-/* 32-bit collation element table format:
- * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
- * case weight - high 4 bit of low 8 bit.
- */
-
-enum weight { UNICODE_WEIGHT, DIACRITIC_WEIGHT, CASE_WEIGHT };
-
-static unsigned int get_weight( WCHAR ch, enum weight type )
-{
- unsigned int ret;
-
- ret = collation_table[collation_table[collation_table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
- if (ret == ~0u) return ch;
-
- switch (type)
- {
- case UNICODE_WEIGHT: return ret >> 16;
- case DIACRITIC_WEIGHT: return (ret >> 8) & 0xff;
- case CASE_WEIGHT: return (ret >> 4) & 0x0f;
- default: return 0;
- }
-}
-
-
-static void inc_str_pos( const WCHAR **str, int *len, unsigned int *dpos, unsigned int *dlen )
-{
- (*dpos)++;
- if (*dpos == *dlen)
- {
- *dpos = *dlen = 0;
- (*str)++;
- (*len)--;
- }
-}
-
-
-static int compare_weights(int flags, const WCHAR *str1, int len1,
- const WCHAR *str2, int len2, enum weight type )
-{
- unsigned int ce1, ce2, dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
- const WCHAR *dstr1 = NULL, *dstr2 = NULL;
-
- while (len1 > 0 && len2 > 0)
- {
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
-
- if (flags & NORM_IGNORESYMBOLS)
- {
- int skip = 0;
- /* FIXME: not tested */
- if (get_char_type( CT_CTYPE1, dstr1[dpos1] ) & (C1_PUNCT | C1_SPACE))
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- skip = 1;
- }
- if (get_char_type( CT_CTYPE1, dstr2[dpos2] ) & (C1_PUNCT | C1_SPACE))
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- skip = 1;
- }
- if (skip) continue;
- }
-
- /* hyphen and apostrophe are treated differently depending on
- * whether SORT_STRINGSORT specified or not
- */
- if (type == UNICODE_WEIGHT && !(flags & SORT_STRINGSORT))
- {
- if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
- {
- if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- continue;
- }
- }
- else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- continue;
- }
- }
-
- ce1 = get_weight( dstr1[dpos1], type );
- if (!ce1)
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- continue;
- }
- ce2 = get_weight( dstr2[dpos2], type );
- if (!ce2)
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- continue;
- }
-
- if (ce1 - ce2) return ce1 - ce2;
-
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- }
- while (len1)
- {
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
- ce1 = get_weight( dstr1[dpos1], type );
- if (ce1) break;
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- }
- while (len2)
- {
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
- ce2 = get_weight( dstr2[dpos2], type );
- if (ce2) break;
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- }
- return len1 - len2;
-}
-
enum sortkey_special_script
{
SORTKEY_UNSORTABLE = 0,
@@ -2492,6 +2372,7 @@ struct sortkey_data
BYTE *buffer;
int buffer_pos;
int buffer_len;
+ BOOL is_compare_string;
};
static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
@@ -2703,7 +2584,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
if (old_pos >= diacritic_start_pos)
{
if (old_pos < data->buffer_len)
+ {
data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
+ *last_weighted_pos = data->buffer_pos;
+ }
}
else
sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
@@ -2912,6 +2796,7 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
data.buffer = buffer;
data.buffer_pos = 0;
data.buffer_len = buffer ? buffer_len : 0;
+ data.is_compare_string = FALSE;
if (str_len == -1)
str_len = wcslen(str);
@@ -2961,6 +2846,155 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
return 0;
}
+static int early_exit_sortkey_comparison(const struct sortkey_data* data1, const struct sortkey_data* data2, int start_index)
+{
+ int i;
+ int end_index = min(data1->buffer_pos, data2->buffer_pos);
+
+ for (i = start_index; i < end_index; i++)
+ {
+ BYTE weight1 = data1->buffer[i];
+ BYTE weight2 = data2->buffer[i];
+
+ if (weight1 > weight2) return CSTR_GREATER_THAN;
+ if (weight1 < weight2) return CSTR_LESS_THAN;
+ }
+
+ return CSTR_EQUAL;
+}
+
+static int sortkey_compare(int flags, const WCHAR *locale_name, const WCHAR *str1, int str1_len, const WCHAR *str2, int str2_len)
+{
+ int i1, i2;
+ int ret;
+ struct sortkey_data data1, data2;
+ const struct sortguid *locale = get_language_sort(locale_name);
+ int diacritic_start_pos1;
+ int last_weighted_pos1;
+ int diacritic_start_pos2;
+ int last_weighted_pos2;
+ int pos_weight_compare;
+
+ BYTE buffer1[10000];
+ BYTE buffer2[10000];
+
+ data1.buffer = buffer1;
+ data1.buffer_pos = 0;
+ data1.buffer_len = sizeof(buffer1);
+ data1.is_compare_string = TRUE;
+
+ data2.buffer = buffer2;
+ data2.buffer_pos = 0;
+ data2.buffer_len = sizeof(buffer2);
+ data2.is_compare_string = TRUE;
+
+ /* Main weights */
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ if (i1 < str1_len)
+ {
+ sortkey_add_main_weights(&data1, flags, str1[i1], locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_main_weights(&data2, flags, str2[i2], locale);
+ }
+
+ /* For clear differences we must return early without reading all characters. See tests. */
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+ }
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+
+ diacritic_start_pos1 = data1.buffer_pos;
+ last_weighted_pos1 = data1.buffer_pos;
+ diacritic_start_pos2 = data2.buffer_pos;
+ last_weighted_pos2 = data2.buffer_pos;
+ pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+
+ /* Diacritic weights */
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ if (i1 < str1_len)
+ {
+ sortkey_add_diacritic_weights(&data1, flags, str1[i1], &last_weighted_pos1, diacritic_start_pos1, locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_diacritic_weights(&data2, flags, str2[i2], &last_weighted_pos2, diacritic_start_pos2, locale);
+ }
+ }
+ data1.buffer_pos = last_weighted_pos1;
+ data2.buffer_pos = last_weighted_pos2;
+
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+ }
+
+ /* Case weights */
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ if (i1 < str1_len)
+ {
+ sortkey_add_case_weights(&data1, flags, str1[i1], locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_case_weights(&data2, flags, str2[i2], locale);
+ }
+
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+ }
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+
+ /* Special weights */
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ if (i1 < str1_len)
+ {
+ sortkey_add_special_weights(&data1, flags, str1[i1], locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_special_weights(&data2, flags, str2[i2], locale);
+ }
+
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+ }
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+
+ return CSTR_EQUAL;
+}
+
+
static const struct geoinfo *get_geoinfo_ptr( GEOID geoid )
{
int min = 0, max = ARRAY_SIZE( geoinfodata )-1;
@@ -3434,16 +3468,8 @@ INT WINAPI CompareStringEx( const WCHAR *locale, DWORD flags, const WCHAR *str1,
if (len1 < 0) len1 = lstrlenW(str1);
if (len2 < 0) len2 = lstrlenW(str2);
- ret = compare_weights( flags, str1, len1, str2, len2, UNICODE_WEIGHT );
- if (!ret)
- {
- if (!(flags & NORM_IGNORENONSPACE))
- ret = compare_weights( flags, str1, len1, str2, len2, DIACRITIC_WEIGHT );
- if (!ret && !(flags & NORM_IGNORECASE))
- ret = compare_weights( flags, str1, len1, str2, len2, CASE_WEIGHT );
- }
- if (!ret) return CSTR_EQUAL;
- return (ret < 0) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
+ ret = sortkey_compare(flags, locale, str1, len1, str2, len2);
+ return ret;
}
--
2.29.2

View File

@ -1,5 +1,2 @@
Fixes: [10767] Fix comparison of punctuation characters in lstrcmp
Fixes: [32490] Graphical issues in Inquisitor
Disabled: true
# The format of the collation table was changed by bfeb0a97f.
# Updating the binary patching will be rather painful.

View File

@ -51,7 +51,7 @@ usage()
# Get the upstream commit sha
upstream_commit()
{
echo "842b38e29166a429d59331be40761335807c85d2"
echo "310019789f7bde12ae3f25f723957c975fb2f804"
}
# Show version information
@ -140,6 +140,7 @@ patch_enable_all ()
enable_kernel32_SetProcessDEPPolicy="$1"
enable_krnl386_exe16_GDT_LDT_Emulation="$1"
enable_krnl386_exe16_Invalid_Console_Handles="$1"
enable_libs_Unicode_Collation="$1"
enable_loader_KeyboardLayouts="$1"
enable_mfplat_streaming_support="$1"
enable_mmsystem_dll16_MIDIHDR_Refcount="$1"
@ -501,6 +502,9 @@ patch_enable ()
krnl386.exe16-Invalid_Console_Handles)
enable_krnl386_exe16_Invalid_Console_Handles="$2"
;;
libs-Unicode_Collation)
enable_libs_Unicode_Collation="$2"
;;
loader-KeyboardLayouts)
enable_loader_KeyboardLayouts="$2"
;;
@ -2727,6 +2731,24 @@ if test "$enable_krnl386_exe16_Invalid_Console_Handles" -eq 1; then
patch_apply krnl386.exe16-Invalid_Console_Handles/0001-krnl386.exe16-Really-translate-all-invalid-console-h.patch
fi
# Patchset libs-Unicode_Collation
# |
# | This patchset fixes the following Wine bugs:
# | * [#10767] Fix comparison of punctuation characters in lstrcmp
# | * [#32490] Graphical issues in Inquisitor
# |
# | Modified files:
# | * dlls/kernel32/tests/locale.c, dlls/kernelbase/locale.c
# |
if test "$enable_libs_Unicode_Collation" -eq 1; then
patch_apply libs-Unicode_Collation/0001-kernelbase-Implement-sortkey-generation-on-official-.patch
patch_apply libs-Unicode_Collation/0002-kernelbase-Implement-sortkey-punctuation.patch
patch_apply libs-Unicode_Collation/0003-kernelbase-Implement-sortkey-for-Japanese-characters.patch
patch_apply libs-Unicode_Collation/0004-kernelbase-Implement-sortkey-expansion.patch
patch_apply libs-Unicode_Collation/0005-kernelbase-Implement-sortkey-language-support.patch
patch_apply libs-Unicode_Collation/0006-kernelbase-Implement-CompareString-functions.patch
fi
# Patchset loader-KeyboardLayouts
# |
# | This patchset fixes the following Wine bugs: