Rebase against 4312d209232c701b0b78d9f8b463917c989005c5.

libs-Unicode_Collation is not fully replaced upstream, but the remaining patch (0006) holds no value anymore.
This commit is contained in:
Zebediah Figura 2022-05-30 18:26:35 -05:00
parent 8ee2551c93
commit c6119e3d5c
9 changed files with 2 additions and 2041 deletions

View File

@ -1,600 +0,0 @@
From b052dd526d176c8b842f446279ee78542b184f08 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Fri, 10 Apr 2020 18:47:18 +0200
Subject: [PATCH] kernelbase: Implement sortkey generation on official tables
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 137 ++++++++++++
dlls/kernelbase/locale.c | 399 ++++++++++++++++++++++++-----------
2 files changed, 413 insertions(+), 123 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 8dc5814162a..d6084e3f328 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -2586,6 +2586,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
lstrlenW(symbols_stripped) + 1, ret);
ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
+ /* test small buffer */
+ lstrcpyW(buf, fooW);
+ ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
+ ok(ret == 0, "Expected a failure\n");
+ ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
+ "%s unexpected error code %ld\n", func_name, GetLastError());
+
/* test srclen = 0 */
SetLastError(0xdeadbeef);
ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));
@@ -3222,6 +3229,135 @@ static void test_sorting(void)
}
}
+struct sorting_test_entry {
+ const WCHAR *locale;
+ int result_sortkey;
+ int result_compare;
+ DWORD flags;
+ const WCHAR *first;
+ const WCHAR *second;
+ BOOL broken_on_xp;
+};
+
+static const struct sorting_test_entry unicode_sorting_tests[] =
+{
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0037", L"\x277c", TRUE }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1eca", L"\x1ecb" }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d05", L"\x1d48" }, /* Normal character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x19d7", L"\x096d" }, /* Normal character diacritics */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00f5", L"\x1ecf" }, /* Normal character diacritics */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x2793", L"\x0d70", TRUE }, /* Normal character diacritics */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"A", L"a" }, /* Normal character case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"z", L"Z" }, /* Normal character case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5a6", L"\xe5a5\x0333", TRUE }, /* PUA character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5d7", L"\xe5d6\x0330", TRUE }, /* PUA character */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\u276a", L"\u2768" }, /* Symbols add diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\u204d", L"\uff02" }, /* Symbols add case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", TRUE }, /* Default character, when there is main weight extra there must be no diacritic weight */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \u2060 b", L"a b" }, /* Unsortable characters */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \xfff0 b", L"a b" }, /* Invalid/undefined characters */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a\x139F a", L"a a" }, /* Invalid/undefined characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a\x139F a", L"a b" }, /* Invalid/undefinde characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x016d" }, /* Default characters */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3fcb\x7fd5", L"\x0006\x3032" }, /* Default characters */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc\x30fd", L"\x00fa\x1833" }, /* Default characters */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1B56\x0330", L"\x1096" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1817\x0333", L"\x19d7" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x04de\x05ac", L"\x0499" }, /* Diacritic is added */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x01ba\x0654", L"\x01b8" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x06b7\x06eb", L"\x06b6" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1420\x0333", L"\x141f" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN ,0, L"\x1b56\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1b56\x0654\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11bc", L"\x110b" }, /* Jamo case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11c1", L"\x1111" }, /* Jamo case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11af", L"\x1105" }, /* Jamo case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11c2", L"\x11f5" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1108", L"\x1121" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1116", L"\x11c7" }, /* Jamo main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11b1", L"\x11d1" }, /* Jamo main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4550\x73d2", L"\x3211\x23ad" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3265", L"\x4079" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4c19\x68d0\x52d0", L"\x316d" }, /* CJK main weight 1 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x72dd", L"\x6b8a" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x5d61", L"\x3aef" }, /* CJK main weight 2 */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207a", L"\xfe62" }, /* Symbols case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe65", L"\xff1e" }, /* Symbols case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x2502", L"\xffe8" }, /* Symbols case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x21da", L"\x21dc" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x29fb", L"\x2295" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0092", L"\x009c" }, /* Symbols diacritic weights */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0092", L"\x009c" }, /* NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x3099", L"\x309a" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x309b", L"\x05a2" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\xff9e", L"\x0e47" }, /* Small diacritic weights at the end get ignored */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a b", L"\x0103 a" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"\x0103" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e x", L"\x0113 v" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"\x0113" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c s", L"\x0109 r" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"\x0109" }, /* Main weights have priority over diacritic weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a \x0103", L"A a" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"A" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e \x0113", L"E e" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"E" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c \x0109", L"C c" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"C" }, /* Diacritic weights have priority over case weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1152", L"\x1153" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1143", L"\x1145" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1196", L"\x1174" }, /* Diacritic values for Jamo are not ignored */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x318e", L"\x382a" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcb", L"\x3d13" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcc", L"\x8632" }, /* Jungseong < PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd847", L"\x382a" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd879", L"\x3d13" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd850", L"\x8632" }, /* Surrogate > PUA */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
+};
+
+static void test_unicode_sorting(void)
+{
+ int i;
+ int ret1;
+ int ret2;
+ BYTE buffer[1000];
+ if (!pLCMapStringEx)
+ {
+ win_skip("LCMapStringEx not available\n");
+ return;
+ }
+ for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
+ {
+ BYTE buff1[1000];
+ BYTE buff2[1000];
+ int len1, len2;
+ int result;
+ const struct sorting_test_entry *entry = &unicode_sorting_tests[i];
+
+ len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);
+ len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);
+
+ result = memcmp(buff1, buff2, min(len1, len2)) + 2;
+
+ ok (result == entry->result_sortkey || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_sortkey, result);
+
+ result = CompareStringEx(entry->locale, entry->flags, entry->first, -1, entry->second, -1, NULL, NULL, 0);
+ if (strcmp(winetest_platform, "wine")) // Disable test on wine for now
+ ok (result == entry->result_compare || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_compare, result);
+ }
+ /* Test diacritics when buffer is short */
+ ret1 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 20, NULL, NULL, 0);
+ ret2 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 0, NULL, NULL, 0);
+ ok(ret1 == ret2, "Got ret1=%d, ret2=%d\n", ret1, ret2);
+}
+
static void test_FoldStringA(void)
{
int ret, i, j;
@@ -7649,6 +7785,7 @@ START_TEST(locale)
test_locale_nls();
test_geo_name();
test_sorting();
+ test_unicode_sorting();
test_EnumCalendarInfoA();
test_EnumCalendarInfoW();
test_EnumCalendarInfoExA();
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index d046cefd749..993ac707a55 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -3048,127 +3048,6 @@ static int wcstombs_codepage( const CPTABLEINFO *info, DWORD flags, const WCHAR
return wcstombs_sbcs( info, src, srclen, dst, dstlen );
}
-
-static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
-{
- WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
- int key_len[4];
- char *key_ptr[4];
- const WCHAR *src_save = src;
- int srclen_save = srclen;
-
- key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
- for (; srclen; srclen--, src++)
- {
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
- dummy[0] = *src;
- if (decomposed_len)
- {
- for (i = 0; i < decomposed_len; i++)
- {
- WCHAR wch = dummy[i];
- unsigned int ce;
-
- if ((flags & NORM_IGNORESYMBOLS) &&
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
- continue;
-
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
- if (ce != (unsigned int)-1)
- {
- if (ce >> 16) key_len[0] += 2;
- if ((ce >> 8) & 0xff) key_len[1]++;
- if ((ce >> 4) & 0x0f) key_len[2]++;
- if (ce & 1)
- {
- if (wch >> 8) key_len[3]++;
- key_len[3]++;
- }
- }
- else
- {
- key_len[0] += 2;
- if (wch >> 8) key_len[0]++;
- if (wch & 0xff) key_len[0]++;
- }
- }
- }
- }
-
- if (!dstlen) /* compute length */
- /* 4 * '\1' + key length */
- return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
-
- if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
- return 0; /* overflow */
-
- src = src_save;
- srclen = srclen_save;
-
- key_ptr[0] = dst;
- key_ptr[1] = key_ptr[0] + key_len[0] + 1;
- key_ptr[2] = key_ptr[1] + key_len[1] + 1;
- key_ptr[3] = key_ptr[2] + key_len[2] + 1;
-
- for (; srclen; srclen--, src++)
- {
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
- dummy[0] = *src;
- if (decomposed_len)
- {
- for (i = 0; i < decomposed_len; i++)
- {
- WCHAR wch = dummy[i];
- unsigned int ce;
-
- if ((flags & NORM_IGNORESYMBOLS) &&
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
- continue;
-
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
- if (ce != (unsigned int)-1)
- {
- WCHAR key;
- if ((key = ce >> 16))
- {
- *key_ptr[0]++ = key >> 8;
- *key_ptr[0]++ = key & 0xff;
- }
- /* make key 1 start from 2 */
- if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
- /* make key 2 start from 2 */
- if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
- /* key 3 is always a character code */
- if (ce & 1)
- {
- if (wch >> 8) *key_ptr[3]++ = wch >> 8;
- if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
- }
- }
- else
- {
- *key_ptr[0]++ = 0xff;
- *key_ptr[0]++ = 0xfe;
- if (wch >> 8) *key_ptr[0]++ = wch >> 8;
- if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
- }
- }
- }
- }
-
- *key_ptr[0] = 1;
- *key_ptr[1] = 1;
- *key_ptr[2] = 1;
- *key_ptr[3]++ = 1;
- *key_ptr[3] = 0;
- return key_ptr[3] - dst;
-}
-
-
/* compose a full-width katakana. return consumed source characters. */
static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst )
{
@@ -3358,6 +3237,280 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
return len1 - len2;
}
+enum sortkey_special_script
+{
+ SORTKEY_UNSORTABLE = 0,
+ SORTKEY_DIACRITIC = 1,
+ SORTKEY_JAPANESE = 3,
+ SORTKEY_JAMO = 4,
+ SORTKEY_CJK = 5,
+ SORTKEY_PUNCTUATION = 6,
+ SORTKEY_SYMBOL_1 = 7,
+ SORTKEY_SYMBOL_2 = 8,
+ SORTKEY_SYMBOL_3 = 9,
+ SORTKEY_SYMBOL_4 = 10,
+ SORTKEY_SYMBOL_5 = 11,
+ SORTKEY_SYMBOL_6 = 12,
+};
+
+#define SORTKEY_MIN_WEIGHT 2
+
+struct character_info
+{
+ BYTE weight_primary;
+ BYTE script_member;
+ BYTE weight_diacritic;
+ BYTE weight_case;
+};
+
+struct sortkey_data
+{
+ BYTE *buffer;
+ int buffer_pos;
+ int buffer_len;
+};
+
+static void sortkey_get_char(struct character_info *info, WCHAR ch)
+{
+ DWORD value = sort.keys[ch];
+
+ info->weight_case = value >> 24;
+ info->weight_diacritic = (value >> 16) & 0xff;
+ info->script_member = (value >> 8) & 0xff;
+ info->weight_primary = value & 0xff;
+}
+
+
+static BOOL sortkey_is_PUA(BYTE script_member)
+{
+ return script_member >= 0xa9 && script_member <= 0xaf;
+}
+
+static void sortkey_add_weight(struct sortkey_data *data, BYTE value)
+{
+ if (data->buffer_pos < data->buffer_len)
+ data->buffer[data->buffer_pos] = value;
+ data->buffer_pos++;
+}
+
+static void sortkey_add_case_weight(struct sortkey_data *data, int flags, BYTE value)
+{
+ if (flags & NORM_IGNORECASE)
+ value &= ~0x18;
+ if (flags & NORM_IGNOREWIDTH)
+ value &= ~0x01;
+
+ sortkey_add_weight(data, value);
+}
+
+static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value, int *last_weighted_pos)
+{
+ sortkey_add_weight(data, value);
+ if (value > SORTKEY_MIN_WEIGHT)
+ *last_weighted_pos = data->buffer_pos;
+}
+
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ case SORTKEY_DIACRITIC:
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_JAMO:
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+
+ case SORTKEY_CJK:
+ sortkey_add_weight(data, 253);
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (flags & NORM_IGNORESYMBOLS)
+ break;
+
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ break;
+
+ default:
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ if (sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
+ sortkey_add_weight(data, info.weight_diacritic);
+ break;
+ }
+}
+
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
+{
+ struct character_info info;
+ int old_pos;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_DIACRITIC:
+ old_pos = data->buffer_pos - 1;
+ /*
+ * Diacritic weights are added to the previous weight, if there is one,
+ * rather than being concatenated after it. This may result in overflow,
+ * which is not protected against. */
+
+ if (old_pos >= diacritic_start_pos)
+ {
+ if (old_pos < data->buffer_len)
+ data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
+ }
+ else
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_JAMO:
+ case SORTKEY_CJK:
+ sortkey_add_diacritic_weight(data, SORTKEY_MIN_WEIGHT, last_weighted_pos);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (!(flags & NORM_IGNORESYMBOLS))
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+
+ default:
+ if (!sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ break;
+ }
+}
+
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ switch (info.script_member)
+ {
+ case SORTKEY_UNSORTABLE:
+ case SORTKEY_DIACRITIC:
+ break;
+
+ case SORTKEY_JAPANESE:
+ /* TODO */
+ break;
+
+ case SORTKEY_CJK:
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
+ break;
+
+ case SORTKEY_PUNCTUATION:
+ /* TODO */
+ break;
+
+ case SORTKEY_SYMBOL_1:
+ case SORTKEY_SYMBOL_2:
+ case SORTKEY_SYMBOL_3:
+ case SORTKEY_SYMBOL_4:
+ case SORTKEY_SYMBOL_5:
+ case SORTKEY_SYMBOL_6:
+ if (!(flags & NORM_IGNORESYMBOLS))
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ break;
+
+ case SORTKEY_JAMO:
+ default:
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ break;
+ }
+}
+
+static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
+{
+ static const BYTE SORTKEY_SEPARATOR = 1;
+ static const BYTE SORTKEY_TERMINATOR = 0;
+ int i;
+ struct sortkey_data data;
+
+ data.buffer = buffer;
+ data.buffer_pos = 0;
+ data.buffer_len = buffer ? buffer_len : 0;
+
+ if (str_len == -1)
+ str_len = wcslen(str);
+
+ /* Main weights */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_main_weights(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Diacritic weights */
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ int diacritic_start_pos = data.buffer_pos;
+ int last_weighted_pos = data.buffer_pos;
+ for (i = 0; i < str_len; i++)
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
+ /* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
+ data.buffer_pos = last_weighted_pos;
+ }
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Case weights */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_case_weights(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Extra weights */
+ /* TODO */
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
+
+ /* Special weights */
+ /* TODO */
+ sortkey_add_weight(&data, SORTKEY_TERMINATOR);
+
+ if (data.buffer_pos <= buffer_len || !buffer)
+ return data.buffer_pos;
+
+ return 0;
+}
static int compare_tzdate( const TIME_FIELDS *tf, const SYSTEMTIME *compare )
{
@@ -5713,8 +5866,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
TRACE( "(%s,0x%08lx,%s,%d,%p,%d)\n",
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;
- else SetLastError( ERROR_INSUFFICIENT_BUFFER );
+ if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
+ SetLastError( ERROR_INSUFFICIENT_BUFFER );
return ret;
}
--
2.35.1

View File

@ -1,128 +0,0 @@
From dca517521550923c881c95659f2309756c84d597 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:47:15 +0200
Subject: [PATCH] kernelbase: Implement sortkey punctuation
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 29 +++++++++++++++++++++++++++++
dlls/kernelbase/locale.c | 35 +++++++++++++++++++++++++++++++----
2 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index d875bf94f92..25c460f4175 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3210,6 +3210,35 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"c", L"C" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"e", L"E" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"A", L"a" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x001b", L"\x001c" }, /* Punctuation primary weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0005", L"\x0006" }, /* Punctuation primary weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027", L"\xff07", TRUE }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f4", L"\x07f5", TRUE }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation diacritic/case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff07", L"\x07f4" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe32", L"\x2014" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x058a", L"\x2027" }, /* Punctuation primary weight has priority */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c" }, /* Punctuation main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
+
+
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 3a29f3e8250..bce705ab484 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2556,7 +2556,11 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
break;
case SORTKEY_SYMBOL_1:
@@ -2619,7 +2623,9 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
break;
case SORTKEY_SYMBOL_1:
@@ -2660,7 +2666,9 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_PUNCTUATION:
- /* TODO */
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
+ break;
+ sortkey_add_case_weight(data, flags, info.weight_case);
break;
case SORTKEY_SYMBOL_1:
@@ -2680,6 +2688,24 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
}
}
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ BYTE weight_second;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_PUNCTUATION)
+ {
+ if ((flags & NORM_IGNORESYMBOLS) || (flags & SORT_STRINGSORT))
+ return;
+
+ weight_second = (BYTE)(info.weight_diacritic * 8 + info.weight_case);
+ sortkey_add_weight(data, info.weight_primary);
+ sortkey_add_weight(data, weight_second);
+ }
+}
+
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
@@ -2721,7 +2747,8 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
- /* TODO */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_special_weights(&data, flags, str[i]);
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
if (data.buffer_pos <= buffer_len || !buffer)
--
2.29.2

View File

@ -1,273 +0,0 @@
From 0f23a9db326dd6040b2d41fac99bd495f718d63d Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:49:02 +0200
Subject: [PATCH] kernelbase: Implement sortkey for Japanese characters
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 94 ++++++++++++++++++++++++++++++-
dlls/kernelbase/locale.c | 104 +++++++++++++++++++++++++++++++++--
2 files changed, 192 insertions(+), 6 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 25c460f4175..43a244d2a6b 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3237,8 +3237,98 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
-
-
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011", }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c", }, /* Punctuation SORT_STRINGSORT main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation SORT_STRINGSORT diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation SORT_STRINGSORT diacritic weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation SORT_STRINGSORT case weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x04b0", L"\x32db" }, /* Japanese main weight */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3093", L"\x1e62\x013f" }, /* Japanese main weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d3", L"\x30d4" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x307b", L"\x307c" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ea", L"\x32f7" }, /* Japanese diacritic weight */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31a2", L"\x3110" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x1342", L"\x133a" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b1\x30f6", L"\xff79\x30b1" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a5", L"\xff73\x30a6" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8\x30a7", L"\xff74\x30a8" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b1", L"\xff79" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a8", L"\xff74" }, /* Kana small data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3046\x30a9", L"\x30a6\x30aa" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x304a\x3041", L"\x30aa\x3042" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3059\x30a7", L"\x30b9\x30a8" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3046", L"\x30a6" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x304a", L"\x30aa" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3059", L"\x30b9" }, /* Kana small data must have priority over kana type data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a8", L"\xff73\x3048" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ab\x30a3", L"\xff76\x3043" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b5\x30ac", L"\xff7b\x304c" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b5", L"\xff7b" }, /* Kana type data must have priority over width data */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x305a a", L"\x30ba A" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30c1 b", L"\xff81 B" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff8b x", L"\x31f6 X" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30c1", L"\xff81" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xff8b", L"\x31f6" }, /* Case weights have priority over extra weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027\x31ff", L"\x007f\xff9b" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f5\x30f3", L"\x07f4\x3093" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe63\x30e0", L"\xff0d\x3080" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0027", L"\x007f" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x07f5", L"\x07f4" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe63", L"\xff0d" }, /* Extra weights have priority over special weights */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff68", L"\x30a3" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff75", L"\x30aa" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30e2", L"\xff93" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff68", L"\x30a3" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff75", L"\x30aa" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30e2", L"\xff93" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30a8", L"\x3048" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30af", L"\x304f" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x3067", L"\x30c7" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8", L"\x3048" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30af", L"\x304f" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3067", L"\x30c7" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb7", L"\x3147" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb6", L"\x3146" },
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x3145", L"\xffb5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb7", L"\x3147" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb6", L"\x3146" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x3145", L"\xffb5" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cff", L"\x30ba" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cdb", L"\x32de" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2ce0", L"\x30c6" }, /* Coptic < Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index bce705ab484..feec402cb61 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2473,6 +2473,11 @@ enum sortkey_special_script
#define SORTKEY_MIN_WEIGHT 2
+const BYTE SORTKEY_FLAGS_EXTRA = 0xc4; /* Extra data added to the flags values */
+const BYTE SORTKEY_FLAG_HIRAGANA = 0x20; /* if bit is set then hiragana, else katakana */
+const BYTE SORTKEY_FLAG_LARGE = 0x02; /* if bit is set then normal kana, else small kana */
+const BYTE SORTKEY_FLAG_FULLWIDTH = 0x01; /* if bit is set then full width, else half width */
+
struct character_info
{
BYTE weight_primary;
@@ -2541,7 +2546,15 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ sortkey_add_weight(data, 34);
+ sortkey_add_weight(data, info.weight_primary);
+ }
break;
case SORTKEY_JAMO:
@@ -2614,7 +2627,12 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
break;
case SORTKEY_JAMO:
@@ -2658,7 +2676,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_JAPANESE:
- /* TODO */
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
break;
case SORTKEY_CJK:
@@ -2706,10 +2729,75 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
}
}
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_LARGE) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+ }
+}
+
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (flags & NORM_IGNOREKANATYPE)
+ info.weight_case = 0;
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_HIRAGANA) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+}
+
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+
+ sortkey_get_char(&info, c);
+
+ if (info.script_member == SORTKEY_JAPANESE)
+ {
+ if (info.weight_primary <= 1)
+ {
+ /* TODO Kana iteration/repeat characters not implemented yet */
+ }
+ else
+ {
+ if (flags & NORM_IGNOREWIDTH)
+ info.weight_case = 0;
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_FULLWIDTH) | SORTKEY_FLAGS_EXTRA);
+ }
+ }
+}
+
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
static const BYTE SORTKEY_TERMINATOR = 0;
+ static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
int i;
struct sortkey_data data;
@@ -2743,7 +2831,15 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Extra weights */
- /* TODO */
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_small(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_kana(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
+ for (i = 0; i < str_len; i++)
+ sortkey_add_extra_weights_width(&data, flags, str[i]);
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
--
2.29.2

View File

@ -1,188 +0,0 @@
From 9ccd944af35dc418a09a17ab70619b37e598ea43 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 16:49:45 +0200
Subject: [PATCH] kernelbase: Implement sortkey expansion
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 6 +++
dlls/kernelbase/locale.c | 91 +++++++++++++++++++++++++++++++++++-
2 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 43a244d2a6b..e8adb32bbbd 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3329,6 +3329,12 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x00c6", L"\x0041\x0045" }, /* Expansion */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x0f5c", L"\x0f5b\x0fb7" }, /* Expansion */
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x05f0", L"\x05d5\x05d5" }, /* Expansion */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index feec402cb61..da358d74934 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2459,6 +2459,7 @@ enum sortkey_special_script
{
SORTKEY_UNSORTABLE = 0,
SORTKEY_DIACRITIC = 1,
+ SORTKEY_EXPANSION = 2,
SORTKEY_JAPANESE = 3,
SORTKEY_JAMO = 4,
SORTKEY_CJK = 5,
@@ -2496,13 +2497,27 @@ struct sortkey_data
static void sortkey_get_char(struct character_info *info, WCHAR ch)
{
DWORD value = sort.keys[ch];
-
info->weight_case = value >> 24;
info->weight_diacritic = (value >> 16) & 0xff;
info->script_member = (value >> 8) & 0xff;
info->weight_primary = value & 0xff;
}
+static const WCHAR* sortkey_get_expansion(WCHAR ch)
+{
+ DWORD pos_info = sort.keys[ch];
+ unsigned int pos = pos_info >> 16;
+ const DWORD *ptr;
+ unsigned int count_expansion;
+ if ((WORD)pos_info != 0x200) /* Check for expansion magic number */
+ return NULL;
+ ptr = (const DWORD *)(sort.guids + sort.guid_count);
+ count_expansion = *ptr++;
+ if (pos >= count_expansion)
+ return NULL;
+ return (const WCHAR *)(ptr + pos);
+}
+
static BOOL sortkey_is_PUA(BYTE script_member)
{
@@ -2533,6 +2548,27 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
*last_weighted_pos = data->buffer_pos;
}
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_main(data, flags, expansion[0]);
+ sortkey_handle_expansion_main(data, flags, expansion[1]);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ sortkey_add_weight(data, info.script_member);
+ sortkey_add_weight(data, info.weight_primary);
+ if (sortkey_is_PUA(info.script_member))
+ sortkey_add_weight(data, info.weight_diacritic);
+ }
+}
+
static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
{
struct character_info info;
@@ -2542,6 +2578,12 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
switch (info.script_member)
{
case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_main(data, flags, c);
+ break;
+
case SORTKEY_DIACRITIC:
break;
@@ -2598,6 +2640,25 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
}
}
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ if (!sortkey_is_PUA(info.script_member))
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
+ }
+}
+
static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
{
struct character_info info;
@@ -2610,6 +2671,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
case SORTKEY_UNSORTABLE:
break;
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
+ break;
+
case SORTKEY_DIACRITIC:
old_pos = data->buffer_pos - 1;
/*
@@ -2663,6 +2728,24 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
}
}
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
+{
+ struct character_info info;
+ const WCHAR *expansion = sortkey_get_expansion(c);
+ if (expansion)
+ {
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
+ sortkey_handle_expansion_case(data, flags, expansion[0]);
+ sortkey_handle_expansion_case(data, flags, expansion[1]);
+ return;
+ }
+ sortkey_get_char(&info, c);
+ if (info.script_member != SORTKEY_UNSORTABLE)
+ {
+ sortkey_add_case_weight(data, flags, info.weight_case);
+ }
+}
+
static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
{
struct character_info info;
@@ -2672,6 +2755,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
switch (info.script_member)
{
case SORTKEY_UNSORTABLE:
+ break;
+
+ case SORTKEY_EXPANSION:
+ sortkey_handle_expansion_case(data, flags, c);
+ break;
+
case SORTKEY_DIACRITIC:
break;
--
2.29.2

View File

@ -1,377 +0,0 @@
From 74ed8758a238f1747055b4c8fa78edc2d5e7aba9 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sat, 8 Aug 2020 17:32:56 +0200
Subject: [PATCH] kernelbase: Implement sortkey language support
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
---
dlls/kernel32/tests/locale.c | 50 ++++++++++++++++++
dlls/kernelbase/locale.c | 99 +++++++++++++++++++++---------------
2 files changed, 109 insertions(+), 40 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 0aaa87e38c1..84931318075 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3210,6 +3210,56 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6df8", L"\x654b\x29e9" }, /* Japanese locale */
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x685d\x1239\x1b61", L"\x59b6\x6542\x2a62\x04a7" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x62f3\x43e9", L"\x5760" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x634c", L"\x2f0d\x5f1c\x7124" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x69e7\x0502", L"\x57cc" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7589", L"\x67c5" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5ede\x765c", L"\x7324" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5c7f\x5961", L"\x7cbe" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3162", L"\x6a84\x1549\x0b60" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x769e\x448e", L"\x4e6e" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x59a4", L"\x5faa\x607c" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x529b", L"\x733f" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6ff8\x2a0a", L"\x7953\x6712" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6dfb", L"\x6793" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x67ed", L"\x6aa2" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4e61", L"\x6350\x6b08" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5118", L"\x53b3\x75b4" },
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6bbf", L"\x65a3" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5690", L"\x5fa8" },
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x61e2", L"\x76e5" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8db6", L"\xd198" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8f72", L"\xd2b9" },
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x91d8", L"\xd318" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8db6", L"\xd198" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8f72", L"\xd2b9" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x91d8", L"\xd318" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0160", L"\x0219" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x059a", L"\x0308" },
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x013a", L"\x013f" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0160", L"\x0219" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x059a", L"\x0308" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x013a", L"\x013f" },
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1d8f", L"\x1ea8" },
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0323", L"\xfe26" },
+ { L"vi-VN", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"R", L"\xff32" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d8f", L"\x1ea8" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0323", L"\xfe26" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"R", L"\xff32" },
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x83ae", L"\x71b9" },
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7e50", L"\xc683" },
+ { L"zh-HK", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6c69", L"\x7f8a" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x83ae", L"\x71b9" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x7e50", L"\xc683" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6c69", L"\x7f8a" },
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00dc", L"\x1ee9" },
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00fc", L"\x1ee6" },
+ { L"tr-TR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0152", L"\x00d6" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index e81ed0e0f2e..394912307a9 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2530,9 +2530,27 @@ struct sortkey_data
int buffer_len;
};
-static void sortkey_get_char(struct character_info *info, WCHAR ch)
+static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
{
- DWORD value = sort.keys[ch];
+ if (locale && locale->except)
+ {
+ DWORD *table = sort.keys + locale->except;
+ DWORD hi = ch >> 8;
+ DWORD lo = ch & 0xff;
+ if (table[hi] == hi * 0x100)
+ return 0;
+ if (sort.keys[table[hi] + lo] == sort.keys[hi * 0x100 + lo])
+ return 0;
+ return sort.keys[table[hi] + lo];
+ }
+ return 0;
+}
+
+static void sortkey_get_char(struct character_info *info, WCHAR ch, const struct sortguid *locale)
+{
+ DWORD value = sortkey_get_exception(ch, locale);
+ if (!value)
+ value = sort.keys[ch];
info->weight_case = value >> 24;
info->weight_diacritic = (value >> 16) & 0xff;
info->script_member = (value >> 8) & 0xff;
@@ -2584,18 +2602,18 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
*last_weighted_pos = data->buffer_pos;
}
-static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_main(data, flags, expansion[0]);
- sortkey_handle_expansion_main(data, flags, expansion[1]);
+ sortkey_handle_expansion_main(data, flags, expansion[0], locale);
+ sortkey_handle_expansion_main(data, flags, expansion[1], locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
sortkey_add_weight(data, info.script_member);
@@ -2605,11 +2623,11 @@ static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags,
}
}
-static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2617,7 +2635,7 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_main(data, flags, c);
+ sortkey_handle_expansion_main(data, flags, c, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2676,18 +2694,18 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
}
}
-static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
- sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos, locale);
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos, locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
if (!sortkey_is_PUA(info.script_member))
@@ -2695,12 +2713,12 @@ static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int fl
}
}
-static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos, const struct sortguid *locale)
{
struct character_info info;
int old_pos;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2708,7 +2726,7 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2764,29 +2782,29 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
}
}
-static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
const WCHAR *expansion = sortkey_get_expansion(c);
if (expansion)
{
/* Expansion characters always follow default character logic, ignoring the script_member value */
- sortkey_handle_expansion_case(data, flags, expansion[0]);
- sortkey_handle_expansion_case(data, flags, expansion[1]);
+ sortkey_handle_expansion_case(data, flags, expansion[0], locale);
+ sortkey_handle_expansion_case(data, flags, expansion[1], locale);
return;
}
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member != SORTKEY_UNSORTABLE)
{
sortkey_add_case_weight(data, flags, info.weight_case);
}
}
-static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
switch (info.script_member)
{
@@ -2794,7 +2812,7 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
break;
case SORTKEY_EXPANSION:
- sortkey_handle_expansion_case(data, flags, c);
+ sortkey_handle_expansion_case(data, flags, c, locale);
break;
case SORTKEY_DIACRITIC:
@@ -2836,12 +2854,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
}
}
-static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
BYTE weight_second;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_PUNCTUATION)
{
@@ -2854,11 +2872,11 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
}
}
-static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2876,11 +2894,11 @@ static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags
}
}
-static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2897,11 +2915,11 @@ static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags,
}
}
-static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
{
struct character_info info;
- sortkey_get_char(&info, c);
+ sortkey_get_char(&info, c, locale);
if (info.script_member == SORTKEY_JAPANESE)
{
@@ -2918,13 +2936,14 @@ static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags
}
}
-static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
+static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
{
static const BYTE SORTKEY_SEPARATOR = 1;
static const BYTE SORTKEY_TERMINATOR = 0;
static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
int i;
struct sortkey_data data;
+ const struct sortguid *locale = get_language_sort(locale_name);
data.buffer = buffer;
data.buffer_pos = 0;
@@ -2935,7 +2954,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
/* Main weights */
for (i = 0; i < str_len; i++)
- sortkey_add_main_weights(&data, flags, str[i]);
+ sortkey_add_main_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Diacritic weights */
@@ -2944,7 +2963,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
int diacritic_start_pos = data.buffer_pos;
int last_weighted_pos = data.buffer_pos;
for (i = 0; i < str_len; i++)
- sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos, locale);
/* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
data.buffer_pos = last_weighted_pos;
}
@@ -2952,24 +2971,24 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
/* Case weights */
for (i = 0; i < str_len; i++)
- sortkey_add_case_weights(&data, flags, str[i]);
+ sortkey_add_case_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Extra weights */
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_small(&data, flags, str[i]);
+ sortkey_add_extra_weights_small(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_kana(&data, flags, str[i]);
+ sortkey_add_extra_weights_kana(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
for (i = 0; i < str_len; i++)
- sortkey_add_extra_weights_width(&data, flags, str[i]);
+ sortkey_add_extra_weights_width(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
/* Special weights */
for (i = 0; i < str_len; i++)
- sortkey_add_special_weights(&data, flags, str[i]);
+ sortkey_add_special_weights(&data, flags, str[i], locale);
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
if (data.buffer_pos <= buffer_len || !buffer)
@@ -5667,7 +5686,7 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
TRACE( "(%s,0x%08lx,%s,%d,%p,%d)\n",
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
+ if (!(ret = sortkey_generate(flags, locale, src, srclen, (BYTE *)dst, dstlen )))
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return ret;
}
--
2.34.1

View File

@ -1,446 +0,0 @@
From 11619cf1483a96e329f640a0abcc571857a87a73 Mon Sep 17 00:00:00 2001
From: Fabian Maurer <dark.shadow4@web.de>
Date: Sun, 6 Dec 2020 20:57:16 +0100
Subject: [PATCH] kernelbase: Implement CompareString functions
---
dlls/kernel32/tests/locale.c | 33 +++--
dlls/kernelbase/locale.c | 261 ++++++++++++++++++-----------------
2 files changed, 149 insertions(+), 145 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 08723c4b865..23054727ad7 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -1894,16 +1894,16 @@ static void test_CompareStringA(void)
"a\\0b vs a expected CSTR_EQUAL or CSTR_GREATER_THAN, got %d\n", ret);
ret = CompareStringA(lcid, 0, "\2", 2, "\1", 2);
- todo_wine ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
+ ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
ret = CompareStringA(lcid, NORM_IGNORECASE | LOCALE_USE_CP_ACP, "#", -1, ".", -1);
- todo_wine ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
+ ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringA(lcid, NORM_IGNORECASE, "_", -1, ".", -1);
- todo_wine ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
+ ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
ret = lstrcmpiA("#", ".");
- todo_wine ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
+ ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
lcid = MAKELCID(MAKELANGID(LANG_POLISH, SUBLANG_DEFAULT), SORT_DEFAULT);
@@ -1989,9 +1989,9 @@ static void test_CompareStringW(void)
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, ABC_EE, 3, A_ACUTE_BC, 4);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, ABC_EE, 4, A_ACUTE_BC_DECOMP, 5);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_ACUTE_BC, 4, A_ACUTE_BC_DECOMP, 5);
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
@@ -2003,12 +2003,12 @@ static void test_CompareStringW(void)
ret = CompareStringW(LOCALE_USER_DEFAULT, 0, A_NULL_BC, 4, A_ACUTE_BC, 4);
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC, 4);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, 0, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
}
struct comparestringex_test {
@@ -2045,7 +2045,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 5 */
"tr-TR", 0,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_IGNORECASE */
{ /* 6 */
@@ -2070,7 +2070,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 11 */
"tr-TR", NORM_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING */
{ /* 12 */
@@ -2095,7 +2095,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 17 */
"tr-TR", NORM_LINGUISTIC_CASING,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with LINGUISTIC_IGNORECASE */
{ /* 18 */
@@ -2120,7 +2120,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 23 */
"tr-TR", LINGUISTIC_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING | NORM_IGNORECASE */
{ /* 24 */
@@ -2145,7 +2145,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 29 */
"tr-TR", NORM_LINGUISTIC_CASING | NORM_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
},
/* with NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE */
{ /* 30 */
@@ -2170,7 +2170,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
},
{ /* 35 */
"tr-TR", NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE,
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, TRUE
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, FALSE
}
};
@@ -3540,6 +3540,9 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa042\x09bc", L"\xa042" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa063\x302b", L"\xa063" }, /* Diacritic is added */
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa07e\x0c56", L"\xa07e" }, /* Diacritic is added */
};
static void test_unicode_sorting(void)
@@ -6537,7 +6540,7 @@ static void test_FindNLSStringEx(void)
{ localeW, FIND_FROMSTART, comb_s_accent1W, ARRAY_SIZE(comb_s_accent1W)-1,
comb_s_accent2W, ARRAY_SIZE(comb_s_accent2W)-1, 0, 0, 6, 1, TRUE },
{ localeW, FIND_FROMSTART, comb_q_accent1W, ARRAY_SIZE(comb_q_accent1W)-1,
- comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 1, FALSE },
+ comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 0, FALSE },
{ 0 }
};
struct test_data *ptest;
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index bc0e3b878c3..6ec5a0fad41 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -3156,126 +3156,6 @@ static int map_to_halfwidth( WCHAR c, WCHAR *dst, int dstlen )
return 1;
}
-
-/* 32-bit collation element table format:
- * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
- * case weight - high 4 bit of low 8 bit.
- */
-
-enum weight { UNICODE_WEIGHT, DIACRITIC_WEIGHT, CASE_WEIGHT };
-
-static unsigned int get_weight( WCHAR ch, enum weight type )
-{
- unsigned int ret;
-
- ret = collation_table[collation_table[collation_table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
- if (ret == ~0u) return ch;
-
- switch (type)
- {
- case UNICODE_WEIGHT: return ret >> 16;
- case DIACRITIC_WEIGHT: return (ret >> 8) & 0xff;
- case CASE_WEIGHT: return (ret >> 4) & 0x0f;
- default: return 0;
- }
-}
-
-
-static void inc_str_pos( const WCHAR **str, int *len, unsigned int *dpos, unsigned int *dlen )
-{
- (*dpos)++;
- if (*dpos == *dlen)
- {
- *dpos = *dlen = 0;
- (*str)++;
- (*len)--;
- }
-}
-
-
-static int compare_weights(int flags, const WCHAR *str1, int len1,
- const WCHAR *str2, int len2, enum weight type )
-{
- unsigned int ce1, ce2, dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
- const WCHAR *dstr1 = NULL, *dstr2 = NULL;
-
- while (len1 > 0 && len2 > 0)
- {
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
-
- if (flags & NORM_IGNORESYMBOLS)
- {
- int skip = 0;
- /* FIXME: not tested */
- if (get_char_type( CT_CTYPE1, dstr1[dpos1] ) & (C1_PUNCT | C1_SPACE))
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- skip = 1;
- }
- if (get_char_type( CT_CTYPE1, dstr2[dpos2] ) & (C1_PUNCT | C1_SPACE))
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- skip = 1;
- }
- if (skip) continue;
- }
-
- /* hyphen and apostrophe are treated differently depending on
- * whether SORT_STRINGSORT specified or not
- */
- if (type == UNICODE_WEIGHT && !(flags & SORT_STRINGSORT))
- {
- if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
- {
- if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- continue;
- }
- }
- else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- continue;
- }
- }
-
- ce1 = get_weight( dstr1[dpos1], type );
- if (!ce1)
- {
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- continue;
- }
- ce2 = get_weight( dstr2[dpos2], type );
- if (!ce2)
- {
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- continue;
- }
-
- if (ce1 - ce2) return ce1 - ce2;
-
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- }
- while (len1)
- {
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
- ce1 = get_weight( dstr1[dpos1], type );
- if (ce1) break;
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
- }
- while (len2)
- {
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
- ce2 = get_weight( dstr2[dpos2], type );
- if (ce2) break;
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
- }
- return len1 - len2;
-}
-
enum sortkey_special_script
{
SORTKEY_UNSORTABLE = 0,
@@ -3313,6 +3193,7 @@ struct sortkey_data
BYTE *buffer;
int buffer_pos;
int buffer_len;
+ BOOL is_compare_string;
};
static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
@@ -3524,7 +3405,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
if (old_pos >= diacritic_start_pos)
{
if (old_pos < data->buffer_len)
+ {
data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
+ *last_weighted_pos = data->buffer_pos;
+ }
}
else
sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
@@ -3733,6 +3617,7 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
data.buffer = buffer;
data.buffer_pos = 0;
data.buffer_len = buffer ? buffer_len : 0;
+ data.is_compare_string = FALSE;
if (str_len == -1)
str_len = wcslen(str);
@@ -3782,6 +3667,130 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
return 0;
}
+static int early_exit_sortkey_comparison(const struct sortkey_data* data1, const struct sortkey_data* data2, int start_index)
+{
+ int i;
+ int end_index = min(data1->buffer_pos, data2->buffer_pos);
+
+ for (i = start_index; i < end_index; i++)
+ {
+ BYTE weight1 = data1->buffer[i];
+ BYTE weight2 = data2->buffer[i];
+
+ if (weight1 > weight2) return CSTR_GREATER_THAN;
+ if (weight1 < weight2) return CSTR_LESS_THAN;
+ }
+
+ return CSTR_EQUAL;
+}
+
+static int sortkey_compare(int flags, const WCHAR *locale_name, const WCHAR *str1, int str1_len, const WCHAR *str2, int str2_len)
+{
+ int i1, i2;
+ int ret;
+ struct sortkey_data data1, data2;
+ const struct sortguid *locale = get_language_sort(locale_name);
+ int diacritic_start_pos1;
+ int last_weighted_pos1;
+ int diacritic_start_pos2;
+ int last_weighted_pos2;
+ int pos_weight_compare;
+
+ BYTE buffer1[10000];
+ BYTE buffer2[10000];
+
+ data1.buffer = buffer1;
+ data1.buffer_pos = 0;
+ data1.buffer_len = sizeof(buffer1);
+ data1.is_compare_string = TRUE;
+
+ data2.buffer = buffer2;
+ data2.buffer_pos = 0;
+ data2.buffer_len = sizeof(buffer2);
+ data2.is_compare_string = TRUE;
+
+ /* Main weights */
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ if (i1 < str1_len)
+ {
+ sortkey_add_main_weights(&data1, flags, str1[i1], locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_main_weights(&data2, flags, str2[i2], locale);
+ }
+
+ /* For clear differences we must return early without reading all characters. See tests. */
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+ }
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+
+ diacritic_start_pos1 = data1.buffer_pos;
+ last_weighted_pos1 = data1.buffer_pos;
+ diacritic_start_pos2 = data2.buffer_pos;
+ last_weighted_pos2 = data2.buffer_pos;
+ pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ /* Diacritic weights */
+ if (!(flags & NORM_IGNORENONSPACE))
+ {
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ if (i1 < str1_len)
+ {
+ sortkey_add_diacritic_weights(&data1, flags, str1[i1], &last_weighted_pos1, diacritic_start_pos1, locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_diacritic_weights(&data2, flags, str2[i2], &last_weighted_pos2, diacritic_start_pos2, locale);
+ }
+ }
+ data1.buffer_pos = last_weighted_pos1;
+ data2.buffer_pos = last_weighted_pos2;
+
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+ }
+
+ /* Special weights */
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
+ {
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
+ if (i1 < str1_len)
+ {
+ sortkey_add_special_weights(&data1, flags, str1[i1], locale);
+ }
+ if (i2 < str2_len)
+ {
+ sortkey_add_special_weights(&data2, flags, str2[i2], locale);
+ }
+
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
+ if (ret != CSTR_EQUAL)
+ return ret;
+ }
+
+ if (data1.buffer_pos > data2.buffer_pos)
+ return CSTR_GREATER_THAN;
+ if (data1.buffer_pos < data2.buffer_pos)
+ return CSTR_LESS_THAN;
+
+ return CSTR_EQUAL;
+}
+
static int compare_tzdate( const TIME_FIELDS *tf, const SYSTEMTIME *compare )
{
static const int month_lengths[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
@@ -4317,16 +4326,8 @@ INT WINAPI CompareStringEx( const WCHAR *locale, DWORD flags, const WCHAR *str1,
if (len1 < 0) len1 = lstrlenW(str1);
if (len2 < 0) len2 = lstrlenW(str2);
- ret = compare_weights( flags, str1, len1, str2, len2, UNICODE_WEIGHT );
- if (!ret)
- {
- if (!(flags & NORM_IGNORENONSPACE))
- ret = compare_weights( flags, str1, len1, str2, len2, DIACRITIC_WEIGHT );
- if (!ret && !(flags & NORM_IGNORECASE))
- ret = compare_weights( flags, str1, len1, str2, len2, CASE_WEIGHT );
- }
- if (!ret) return CSTR_EQUAL;
- return (ret < 0) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
+ ret = sortkey_compare(flags, locale, str1, len1, str2, len2);
+ return ret;
}
--
2.35.1

View File

@ -1,3 +0,0 @@
Fixes: [5163] Microsoft Office XP 2002 installer reports error 25003 (installation source corrupted), custom action 'CADpc' returns 1603
Fixes: [10767] Fix comparison of punctuation characters in lstrcmp
Fixes: [32490] Graphical issues in Inquisitor

View File

@ -51,7 +51,7 @@ usage()
# Get the upstream commit sha
upstream_commit()
{
echo "99ce6e87a3b22c5602d7bbedd43bb40627b63321"
echo "4312d209232c701b0b78d9f8b463917c989005c5"
}
# Show version information
@ -123,7 +123,6 @@ patch_enable_all ()
enable_kernel32_Processor_Group="$1"
enable_krnl386_exe16_GDT_LDT_Emulation="$1"
enable_krnl386_exe16_Invalid_Console_Handles="$1"
enable_libs_Unicode_Collation="$1"
enable_loader_KeyboardLayouts="$1"
enable_mmsystem_dll16_MIDIHDR_Refcount="$1"
enable_mountmgr_DosDevices="$1"
@ -394,9 +393,6 @@ patch_enable ()
krnl386.exe16-Invalid_Console_Handles)
enable_krnl386_exe16_Invalid_Console_Handles="$2"
;;
libs-Unicode_Collation)
enable_libs_Unicode_Collation="$2"
;;
loader-KeyboardLayouts)
enable_loader_KeyboardLayouts="$2"
;;
@ -2112,26 +2108,6 @@ if test "$enable_krnl386_exe16_Invalid_Console_Handles" -eq 1; then
patch_apply krnl386.exe16-Invalid_Console_Handles/0001-krnl386.exe16-Really-translate-all-invalid-console-h.patch
fi
# Patchset libs-Unicode_Collation
# |
# | This patchset fixes the following Wine bugs:
# | * [#5163] Microsoft Office XP 2002 installer reports error 25003 (installation source corrupted), custom action 'CADpc'
# | returns 1603
# | * [#10767] Fix comparison of punctuation characters in lstrcmp
# | * [#32490] Graphical issues in Inquisitor
# |
# | Modified files:
# | * dlls/kernel32/tests/locale.c, dlls/kernelbase/locale.c
# |
if test "$enable_libs_Unicode_Collation" -eq 1; then
patch_apply libs-Unicode_Collation/0001-kernelbase-Implement-sortkey-generation-on-official-.patch
patch_apply libs-Unicode_Collation/0002-kernelbase-Implement-sortkey-punctuation.patch
patch_apply libs-Unicode_Collation/0003-kernelbase-Implement-sortkey-for-Japanese-characters.patch
patch_apply libs-Unicode_Collation/0004-kernelbase-Implement-sortkey-expansion.patch
patch_apply libs-Unicode_Collation/0005-kernelbase-Implement-sortkey-language-support.patch
patch_apply libs-Unicode_Collation/0006-kernelbase-Implement-CompareString-functions.patch
fi
# Patchset loader-KeyboardLayouts
# |
# | This patchset fixes the following Wine bugs:

View File

@ -1 +1 @@
99ce6e87a3b22c5602d7bbedd43bb40627b63321
4312d209232c701b0b78d9f8b463917c989005c5