mirror of
https://gitlab.winehq.org/wine/wine-staging.git
synced 2024-11-21 16:46:54 -08:00
Rebase against 4312d209232c701b0b78d9f8b463917c989005c5.
libs-Unicode_Collation is not fully replaced upstream, but the remaining patch (0006) holds no value anymore.
This commit is contained in:
parent
8ee2551c93
commit
c6119e3d5c
@ -1,600 +0,0 @@
|
||||
From b052dd526d176c8b842f446279ee78542b184f08 Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Fri, 10 Apr 2020 18:47:18 +0200
|
||||
Subject: [PATCH] kernelbase: Implement sortkey generation on official tables
|
||||
|
||||
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 137 ++++++++++++
|
||||
dlls/kernelbase/locale.c | 399 ++++++++++++++++++++++++-----------
|
||||
2 files changed, 413 insertions(+), 123 deletions(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index 8dc5814162a..d6084e3f328 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -2586,6 +2586,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
|
||||
lstrlenW(symbols_stripped) + 1, ret);
|
||||
ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
|
||||
|
||||
+ /* test small buffer */
|
||||
+ lstrcpyW(buf, fooW);
|
||||
+ ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
|
||||
+ ok(ret == 0, "Expected a failure\n");
|
||||
+ ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
|
||||
+ "%s unexpected error code %ld\n", func_name, GetLastError());
|
||||
+
|
||||
/* test srclen = 0 */
|
||||
SetLastError(0xdeadbeef);
|
||||
ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));
|
||||
@@ -3222,6 +3229,135 @@ static void test_sorting(void)
|
||||
}
|
||||
}
|
||||
|
||||
+struct sorting_test_entry {
|
||||
+ const WCHAR *locale;
|
||||
+ int result_sortkey;
|
||||
+ int result_compare;
|
||||
+ DWORD flags;
|
||||
+ const WCHAR *first;
|
||||
+ const WCHAR *second;
|
||||
+ BOOL broken_on_xp;
|
||||
+};
|
||||
+
|
||||
+static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
+{
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0037", L"\x277c", TRUE }, /* Normal character */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1eca", L"\x1ecb" }, /* Normal character */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d05", L"\x1d48" }, /* Normal character */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x19d7", L"\x096d" }, /* Normal character diacritics */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00f5", L"\x1ecf" }, /* Normal character diacritics */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x2793", L"\x0d70", TRUE }, /* Normal character diacritics */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"A", L"a" }, /* Normal character case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"z", L"Z" }, /* Normal character case weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5a6", L"\xe5a5\x0333", TRUE }, /* PUA character */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xe5d7", L"\xe5d6\x0330", TRUE }, /* PUA character */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\u276a", L"\u2768" }, /* Symbols add diacritic weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\u204d", L"\uff02" }, /* Symbols add case weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", TRUE }, /* Default character, when there is main weight extra there must be no diacritic weight */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \u2060 b", L"a b" }, /* Unsortable characters */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a \xfff0 b", L"a b" }, /* Invalid/undefined characters */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"a\x139F a", L"a a" }, /* Invalid/undefined characters */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a\x139F a", L"a b" }, /* Invalid/undefinde characters */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x016d" }, /* Default characters */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3fcb\x7fd5", L"\x0006\x3032" }, /* Default characters */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc\x30fd", L"\x00fa\x1833" }, /* Default characters */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1B56\x0330", L"\x1096" }, /* Diacritic is added */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1817\x0333", L"\x19d7" }, /* Diacritic is added */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x04de\x05ac", L"\x0499" }, /* Diacritic is added */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x01ba\x0654", L"\x01b8" }, /* Diacritic can overflow */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x06b7\x06eb", L"\x06b6" }, /* Diacritic can overflow */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1420\x0333", L"\x141f" }, /* Diacritic can overflow */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN ,0, L"\x1b56\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1b56\x0654\x0654", L"\x1b56\x0655" }, /* Diacritic can overflow */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11bc", L"\x110b" }, /* Jamo case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11c1", L"\x1111" }, /* Jamo case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x11af", L"\x1105" }, /* Jamo case weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11c2", L"\x11f5" }, /* Jamo main weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1108", L"\x1121" }, /* Jamo main weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1116", L"\x11c7" }, /* Jamo main weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x11b1", L"\x11d1" }, /* Jamo main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4550\x73d2", L"\x3211\x23ad" }, /* CJK main weight 1 */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3265", L"\x4079" }, /* CJK main weight 1 */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4c19\x68d0\x52d0", L"\x316d" }, /* CJK main weight 1 */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x72dd", L"\x6b8a" }, /* CJK main weight 2 */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7" }, /* CJK main weight 2 */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x5d61", L"\x3aef" }, /* CJK main weight 2 */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207a", L"\xfe62" }, /* Symbols case weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe65", L"\xff1e" }, /* Symbols case weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x2502", L"\xffe8" }, /* Symbols case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x21da", L"\x21dc" }, /* Symbols diacritic weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x29fb", L"\x2295" }, /* Symbols diacritic weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0092", L"\x009c" }, /* Symbols diacritic weights */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc" }, /* NORM_IGNORESYMBOLS */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295" }, /* NORM_IGNORESYMBOLS */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0092", L"\x009c" }, /* NORM_IGNORESYMBOLS */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x3099", L"\x309a" }, /* Small diacritic weights at the end get ignored */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\x309b", L"\x05a2" }, /* Small diacritic weights at the end get ignored */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_LESS_THAN, 0, L"\xff9e", L"\x0e47" }, /* Small diacritic weights at the end get ignored */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a b", L"\x0103 a" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"\x0103" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e x", L"\x0113 v" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"\x0113" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c s", L"\x0109 r" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"\x0109" }, /* Main weights have priority over diacritic weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"a \x0103", L"A a" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"a", L"A" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"e \x0113", L"E e" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"e", L"E" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"c \x0109", L"C c" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"c", L"C" }, /* Diacritic weights have priority over case weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1152", L"\x1153" }, /* Diacritic values for Jamo are not ignored */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1143", L"\x1145" }, /* Diacritic values for Jamo are not ignored */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORENONSPACE, L"\x1196", L"\x1174" }, /* Diacritic values for Jamo are not ignored */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x318e", L"\x382a" }, /* Jungseong < PUA */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcb", L"\x3d13" }, /* Jungseong < PUA */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xffcc", L"\x8632" }, /* Jungseong < PUA */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd847", L"\x382a" }, /* Surrogate > PUA */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd879", L"\x3d13" }, /* Surrogate > PUA */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xd850", L"\x8632" }, /* Surrogate > PUA */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
|
||||
+};
|
||||
+
|
||||
+static void test_unicode_sorting(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ int ret1;
|
||||
+ int ret2;
|
||||
+ BYTE buffer[1000];
|
||||
+ if (!pLCMapStringEx)
|
||||
+ {
|
||||
+ win_skip("LCMapStringEx not available\n");
|
||||
+ return;
|
||||
+ }
|
||||
+ for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
|
||||
+ {
|
||||
+ BYTE buff1[1000];
|
||||
+ BYTE buff2[1000];
|
||||
+ int len1, len2;
|
||||
+ int result;
|
||||
+ const struct sorting_test_entry *entry = &unicode_sorting_tests[i];
|
||||
+
|
||||
+ len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);
|
||||
+ len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);
|
||||
+
|
||||
+ result = memcmp(buff1, buff2, min(len1, len2)) + 2;
|
||||
+
|
||||
+ ok (result == entry->result_sortkey || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
|
||||
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_sortkey, result);
|
||||
+
|
||||
+ result = CompareStringEx(entry->locale, entry->flags, entry->first, -1, entry->second, -1, NULL, NULL, 0);
|
||||
+ if (strcmp(winetest_platform, "wine")) // Disable test on wine for now
|
||||
+ ok (result == entry->result_compare || broken(entry->broken_on_xp), "Test %d (%s, %s) - Expected %d, got %d\n",
|
||||
+ i, wine_dbgstr_w(entry->first), wine_dbgstr_w(entry->second), entry->result_compare, result);
|
||||
+ }
|
||||
+ /* Test diacritics when buffer is short */
|
||||
+ ret1 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 20, NULL, NULL, 0);
|
||||
+ ret2 = pLCMapStringEx(L"en-US", LCMAP_SORTKEY, L"\x0e49\x0e49\x0e49\x0e49\x0e49", -1, (WCHAR*)buffer, 0, NULL, NULL, 0);
|
||||
+ ok(ret1 == ret2, "Got ret1=%d, ret2=%d\n", ret1, ret2);
|
||||
+}
|
||||
+
|
||||
static void test_FoldStringA(void)
|
||||
{
|
||||
int ret, i, j;
|
||||
@@ -7649,6 +7785,7 @@ START_TEST(locale)
|
||||
test_locale_nls();
|
||||
test_geo_name();
|
||||
test_sorting();
|
||||
+ test_unicode_sorting();
|
||||
test_EnumCalendarInfoA();
|
||||
test_EnumCalendarInfoW();
|
||||
test_EnumCalendarInfoExA();
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index d046cefd749..993ac707a55 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -3048,127 +3048,6 @@ static int wcstombs_codepage( const CPTABLEINFO *info, DWORD flags, const WCHAR
|
||||
return wcstombs_sbcs( info, src, srclen, dst, dstlen );
|
||||
}
|
||||
|
||||
-
|
||||
-static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
|
||||
-{
|
||||
- WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
|
||||
- int key_len[4];
|
||||
- char *key_ptr[4];
|
||||
- const WCHAR *src_save = src;
|
||||
- int srclen_save = srclen;
|
||||
-
|
||||
- key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
|
||||
- for (; srclen; srclen--, src++)
|
||||
- {
|
||||
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
|
||||
- dummy[0] = *src;
|
||||
- if (decomposed_len)
|
||||
- {
|
||||
- for (i = 0; i < decomposed_len; i++)
|
||||
- {
|
||||
- WCHAR wch = dummy[i];
|
||||
- unsigned int ce;
|
||||
-
|
||||
- if ((flags & NORM_IGNORESYMBOLS) &&
|
||||
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
|
||||
- continue;
|
||||
-
|
||||
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
|
||||
-
|
||||
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
|
||||
- if (ce != (unsigned int)-1)
|
||||
- {
|
||||
- if (ce >> 16) key_len[0] += 2;
|
||||
- if ((ce >> 8) & 0xff) key_len[1]++;
|
||||
- if ((ce >> 4) & 0x0f) key_len[2]++;
|
||||
- if (ce & 1)
|
||||
- {
|
||||
- if (wch >> 8) key_len[3]++;
|
||||
- key_len[3]++;
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- key_len[0] += 2;
|
||||
- if (wch >> 8) key_len[0]++;
|
||||
- if (wch & 0xff) key_len[0]++;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- if (!dstlen) /* compute length */
|
||||
- /* 4 * '\1' + key length */
|
||||
- return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
|
||||
-
|
||||
- if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
|
||||
- return 0; /* overflow */
|
||||
-
|
||||
- src = src_save;
|
||||
- srclen = srclen_save;
|
||||
-
|
||||
- key_ptr[0] = dst;
|
||||
- key_ptr[1] = key_ptr[0] + key_len[0] + 1;
|
||||
- key_ptr[2] = key_ptr[1] + key_len[1] + 1;
|
||||
- key_ptr[3] = key_ptr[2] + key_len[2] + 1;
|
||||
-
|
||||
- for (; srclen; srclen--, src++)
|
||||
- {
|
||||
- unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
|
||||
- dummy[0] = *src;
|
||||
- if (decomposed_len)
|
||||
- {
|
||||
- for (i = 0; i < decomposed_len; i++)
|
||||
- {
|
||||
- WCHAR wch = dummy[i];
|
||||
- unsigned int ce;
|
||||
-
|
||||
- if ((flags & NORM_IGNORESYMBOLS) &&
|
||||
- (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
|
||||
- continue;
|
||||
-
|
||||
- if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
|
||||
-
|
||||
- ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
|
||||
- if (ce != (unsigned int)-1)
|
||||
- {
|
||||
- WCHAR key;
|
||||
- if ((key = ce >> 16))
|
||||
- {
|
||||
- *key_ptr[0]++ = key >> 8;
|
||||
- *key_ptr[0]++ = key & 0xff;
|
||||
- }
|
||||
- /* make key 1 start from 2 */
|
||||
- if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
|
||||
- /* make key 2 start from 2 */
|
||||
- if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
|
||||
- /* key 3 is always a character code */
|
||||
- if (ce & 1)
|
||||
- {
|
||||
- if (wch >> 8) *key_ptr[3]++ = wch >> 8;
|
||||
- if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- *key_ptr[0]++ = 0xff;
|
||||
- *key_ptr[0]++ = 0xfe;
|
||||
- if (wch >> 8) *key_ptr[0]++ = wch >> 8;
|
||||
- if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- *key_ptr[0] = 1;
|
||||
- *key_ptr[1] = 1;
|
||||
- *key_ptr[2] = 1;
|
||||
- *key_ptr[3]++ = 1;
|
||||
- *key_ptr[3] = 0;
|
||||
- return key_ptr[3] - dst;
|
||||
-}
|
||||
-
|
||||
-
|
||||
/* compose a full-width katakana. return consumed source characters. */
|
||||
static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst )
|
||||
{
|
||||
@@ -3358,6 +3237,280 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
|
||||
return len1 - len2;
|
||||
}
|
||||
|
||||
+enum sortkey_special_script
|
||||
+{
|
||||
+ SORTKEY_UNSORTABLE = 0,
|
||||
+ SORTKEY_DIACRITIC = 1,
|
||||
+ SORTKEY_JAPANESE = 3,
|
||||
+ SORTKEY_JAMO = 4,
|
||||
+ SORTKEY_CJK = 5,
|
||||
+ SORTKEY_PUNCTUATION = 6,
|
||||
+ SORTKEY_SYMBOL_1 = 7,
|
||||
+ SORTKEY_SYMBOL_2 = 8,
|
||||
+ SORTKEY_SYMBOL_3 = 9,
|
||||
+ SORTKEY_SYMBOL_4 = 10,
|
||||
+ SORTKEY_SYMBOL_5 = 11,
|
||||
+ SORTKEY_SYMBOL_6 = 12,
|
||||
+};
|
||||
+
|
||||
+#define SORTKEY_MIN_WEIGHT 2
|
||||
+
|
||||
+struct character_info
|
||||
+{
|
||||
+ BYTE weight_primary;
|
||||
+ BYTE script_member;
|
||||
+ BYTE weight_diacritic;
|
||||
+ BYTE weight_case;
|
||||
+};
|
||||
+
|
||||
+struct sortkey_data
|
||||
+{
|
||||
+ BYTE *buffer;
|
||||
+ int buffer_pos;
|
||||
+ int buffer_len;
|
||||
+};
|
||||
+
|
||||
+static void sortkey_get_char(struct character_info *info, WCHAR ch)
|
||||
+{
|
||||
+ DWORD value = sort.keys[ch];
|
||||
+
|
||||
+ info->weight_case = value >> 24;
|
||||
+ info->weight_diacritic = (value >> 16) & 0xff;
|
||||
+ info->script_member = (value >> 8) & 0xff;
|
||||
+ info->weight_primary = value & 0xff;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static BOOL sortkey_is_PUA(BYTE script_member)
|
||||
+{
|
||||
+ return script_member >= 0xa9 && script_member <= 0xaf;
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_weight(struct sortkey_data *data, BYTE value)
|
||||
+{
|
||||
+ if (data->buffer_pos < data->buffer_len)
|
||||
+ data->buffer[data->buffer_pos] = value;
|
||||
+ data->buffer_pos++;
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_case_weight(struct sortkey_data *data, int flags, BYTE value)
|
||||
+{
|
||||
+ if (flags & NORM_IGNORECASE)
|
||||
+ value &= ~0x18;
|
||||
+ if (flags & NORM_IGNOREWIDTH)
|
||||
+ value &= ~0x01;
|
||||
+
|
||||
+ sortkey_add_weight(data, value);
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value, int *last_weighted_pos)
|
||||
+{
|
||||
+ sortkey_add_weight(data, value);
|
||||
+ if (value > SORTKEY_MIN_WEIGHT)
|
||||
+ *last_weighted_pos = data->buffer_pos;
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ switch (info.script_member)
|
||||
+ {
|
||||
+ case SORTKEY_UNSORTABLE:
|
||||
+ case SORTKEY_DIACRITIC:
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAPANESE:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAMO:
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ sortkey_add_weight(data, info.weight_diacritic);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_CJK:
|
||||
+ sortkey_add_weight(data, 253);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ sortkey_add_weight(data, info.weight_diacritic);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_PUNCTUATION:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_SYMBOL_1:
|
||||
+ case SORTKEY_SYMBOL_2:
|
||||
+ case SORTKEY_SYMBOL_3:
|
||||
+ case SORTKEY_SYMBOL_4:
|
||||
+ case SORTKEY_SYMBOL_5:
|
||||
+ case SORTKEY_SYMBOL_6:
|
||||
+ if (flags & NORM_IGNORESYMBOLS)
|
||||
+ break;
|
||||
+
|
||||
+ sortkey_add_weight(data, info.script_member);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ break;
|
||||
+
|
||||
+ default:
|
||||
+ sortkey_add_weight(data, info.script_member);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ if (sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
|
||||
+ sortkey_add_weight(data, info.weight_diacritic);
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+ int old_pos;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ switch (info.script_member)
|
||||
+ {
|
||||
+ case SORTKEY_UNSORTABLE:
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_DIACRITIC:
|
||||
+ old_pos = data->buffer_pos - 1;
|
||||
+ /*
|
||||
+ * Diacritic weights are added to the previous weight, if there is one,
|
||||
+ * rather than being concatenated after it. This may result in overflow,
|
||||
+ * which is not protected against. */
|
||||
+
|
||||
+ if (old_pos >= diacritic_start_pos)
|
||||
+ {
|
||||
+ if (old_pos < data->buffer_len)
|
||||
+ data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
|
||||
+ }
|
||||
+ else
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAPANESE:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAMO:
|
||||
+ case SORTKEY_CJK:
|
||||
+ sortkey_add_diacritic_weight(data, SORTKEY_MIN_WEIGHT, last_weighted_pos);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_PUNCTUATION:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_SYMBOL_1:
|
||||
+ case SORTKEY_SYMBOL_2:
|
||||
+ case SORTKEY_SYMBOL_3:
|
||||
+ case SORTKEY_SYMBOL_4:
|
||||
+ case SORTKEY_SYMBOL_5:
|
||||
+ case SORTKEY_SYMBOL_6:
|
||||
+ if (!(flags & NORM_IGNORESYMBOLS))
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
+ break;
|
||||
+
|
||||
+ default:
|
||||
+ if (!sortkey_is_PUA(info.script_member)) /* PUA characters are handled differently */
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ switch (info.script_member)
|
||||
+ {
|
||||
+ case SORTKEY_UNSORTABLE:
|
||||
+ case SORTKEY_DIACRITIC:
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAPANESE:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_CJK:
|
||||
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_PUNCTUATION:
|
||||
+ /* TODO */
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_SYMBOL_1:
|
||||
+ case SORTKEY_SYMBOL_2:
|
||||
+ case SORTKEY_SYMBOL_3:
|
||||
+ case SORTKEY_SYMBOL_4:
|
||||
+ case SORTKEY_SYMBOL_5:
|
||||
+ case SORTKEY_SYMBOL_6:
|
||||
+ if (!(flags & NORM_IGNORESYMBOLS))
|
||||
+ sortkey_add_case_weight(data, flags, info.weight_case);
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_JAMO:
|
||||
+ default:
|
||||
+ sortkey_add_case_weight(data, flags, info.weight_case);
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
|
||||
+{
|
||||
+ static const BYTE SORTKEY_SEPARATOR = 1;
|
||||
+ static const BYTE SORTKEY_TERMINATOR = 0;
|
||||
+ int i;
|
||||
+ struct sortkey_data data;
|
||||
+
|
||||
+ data.buffer = buffer;
|
||||
+ data.buffer_pos = 0;
|
||||
+ data.buffer_len = buffer ? buffer_len : 0;
|
||||
+
|
||||
+ if (str_len == -1)
|
||||
+ str_len = wcslen(str);
|
||||
+
|
||||
+ /* Main weights */
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_main_weights(&data, flags, str[i]);
|
||||
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
+
|
||||
+ /* Diacritic weights */
|
||||
+ if (!(flags & NORM_IGNORENONSPACE))
|
||||
+ {
|
||||
+ int diacritic_start_pos = data.buffer_pos;
|
||||
+ int last_weighted_pos = data.buffer_pos;
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
|
||||
+ /* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
|
||||
+ data.buffer_pos = last_weighted_pos;
|
||||
+ }
|
||||
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
+
|
||||
+ /* Case weights */
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_case_weights(&data, flags, str[i]);
|
||||
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
+
|
||||
+ /* Extra weights */
|
||||
+ /* TODO */
|
||||
+ sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
+
|
||||
+ /* Special weights */
|
||||
+ /* TODO */
|
||||
+ sortkey_add_weight(&data, SORTKEY_TERMINATOR);
|
||||
+
|
||||
+ if (data.buffer_pos <= buffer_len || !buffer)
|
||||
+ return data.buffer_pos;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
static int compare_tzdate( const TIME_FIELDS *tf, const SYSTEMTIME *compare )
|
||||
{
|
||||
@@ -5713,8 +5866,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
|
||||
TRACE( "(%s,0x%08lx,%s,%d,%p,%d)\n",
|
||||
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
|
||||
|
||||
- if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;
|
||||
- else SetLastError( ERROR_INSUFFICIENT_BUFFER );
|
||||
+ if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
|
||||
+ SetLastError( ERROR_INSUFFICIENT_BUFFER );
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.35.1
|
||||
|
@ -1,128 +0,0 @@
|
||||
From dca517521550923c881c95659f2309756c84d597 Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Sat, 8 Aug 2020 16:47:15 +0200
|
||||
Subject: [PATCH] kernelbase: Implement sortkey punctuation
|
||||
|
||||
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 29 +++++++++++++++++++++++++++++
|
||||
dlls/kernelbase/locale.c | 35 +++++++++++++++++++++++++++++++----
|
||||
2 files changed, 60 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index d875bf94f92..25c460f4175 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -3210,6 +3210,35 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"A\x0301\x0301", L"A\x0301\x00ad\x0301" }, /* Unsortable combined with diacritics */
|
||||
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"b\x07f2\x07f2", L"b\x07f2\x2064\x07f2" }, /* Unsortable combined with diacritics */
|
||||
{ L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"X\x0337\x0337", L"X\x0337\xfffd\x0337" }, /* Unsortable combined with diacritics */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"c", L"C" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"e", L"E" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORECASE, L"A", L"a" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x001b", L"\x001c" }, /* Punctuation primary weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0005", L"\x0006" }, /* Punctuation primary weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027", L"\xff07", TRUE }, /* Punctuation diacritic/case weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f4", L"\x07f5", TRUE }, /* Punctuation diacritic/case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation diacritic/case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x207b", L"\x0008" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0004", L"\x0011" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff07", L"\x07f4" }, /* Punctuation primary weight has priority */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe32", L"\x2014" }, /* Punctuation primary weight has priority */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x058a", L"\x2027" }, /* Punctuation primary weight has priority */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011" }, /* Punctuation main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c" }, /* Punctuation main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation diacritic weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation diacritic weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
|
||||
+
|
||||
+
|
||||
};
|
||||
|
||||
static void test_unicode_sorting(void)
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index 3a29f3e8250..bce705ab484 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -2556,7 +2556,11 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_PUNCTUATION:
|
||||
- /* TODO */
|
||||
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
|
||||
+ break;
|
||||
+
|
||||
+ sortkey_add_weight(data, info.script_member);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
break;
|
||||
|
||||
case SORTKEY_SYMBOL_1:
|
||||
@@ -2619,7 +2623,9 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
break;
|
||||
|
||||
case SORTKEY_PUNCTUATION:
|
||||
- /* TODO */
|
||||
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
|
||||
+ break;
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
break;
|
||||
|
||||
case SORTKEY_SYMBOL_1:
|
||||
@@ -2660,7 +2666,9 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_PUNCTUATION:
|
||||
- /* TODO */
|
||||
+ if ((flags & NORM_IGNORESYMBOLS) || !(flags & SORT_STRINGSORT))
|
||||
+ break;
|
||||
+ sortkey_add_case_weight(data, flags, info.weight_case);
|
||||
break;
|
||||
|
||||
case SORTKEY_SYMBOL_1:
|
||||
@@ -2680,6 +2688,24 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
}
|
||||
}
|
||||
|
||||
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+ BYTE weight_second;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ if (info.script_member == SORTKEY_PUNCTUATION)
|
||||
+ {
|
||||
+ if ((flags & NORM_IGNORESYMBOLS) || (flags & SORT_STRINGSORT))
|
||||
+ return;
|
||||
+
|
||||
+ weight_second = (BYTE)(info.weight_diacritic * 8 + info.weight_case);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ sortkey_add_weight(data, weight_second);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
|
||||
{
|
||||
static const BYTE SORTKEY_SEPARATOR = 1;
|
||||
@@ -2721,7 +2747,8 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Special weights */
|
||||
- /* TODO */
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_special_weights(&data, flags, str[i]);
|
||||
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
|
||||
|
||||
if (data.buffer_pos <= buffer_len || !buffer)
|
||||
--
|
||||
2.29.2
|
||||
|
@ -1,273 +0,0 @@
|
||||
From 0f23a9db326dd6040b2d41fac99bd495f718d63d Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Sat, 8 Aug 2020 16:49:02 +0200
|
||||
Subject: [PATCH] kernelbase: Implement sortkey for Japanese characters
|
||||
|
||||
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 94 ++++++++++++++++++++++++++++++-
|
||||
dlls/kernelbase/locale.c | 104 +++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 192 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index 25c460f4175..43a244d2a6b 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -3237,8 +3237,98 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation case weight */
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation case weight */
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation case weight */
|
||||
-
|
||||
-
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011" }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, SORT_STRINGSORT, L"\x001a", L"\x001b" }, /* Punctuation SORT_STRINGSORT main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x2027", L"\x2011", }, /* Punctuation SORT_STRINGSORT main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x3030", L"\x301c", }, /* Punctuation SORT_STRINGSORT main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x058a", L"\x2010" }, /* Punctuation SORT_STRINGSORT diacritic weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\x07F5", L"\x07F4" }, /* Punctuation SORT_STRINGSORT diacritic weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe32", L"\x2013" }, /* Punctuation SORT_STRINGSORT case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xfe31", L"\xfe58" }, /* Punctuation SORT_STRINGSORT case weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, SORT_STRINGSORT, L"\xff07", L"\x0027" }, /* Punctuation SORT_STRINGSORT case weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x04b0", L"\x32db" }, /* Japanese main weight */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3093", L"\x1e62\x013f" }, /* Japanese main weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d3", L"\x30d4" }, /* Japanese diacritic weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x307b", L"\x307c" }, /* Japanese diacritic weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ea", L"\x32f7" }, /* Japanese diacritic weight */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x30db", L"\x31f9" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\xff6d", L"\xff95" }, /* Japanese case weight small */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba" }, /* Japanese case weight kana */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30bf", L"\xff80" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30ab", L"\xff76" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30a2", L"\xff71" }, /* Japanese case weight width */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x31a2", L"\x3110" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x1342", L"\x133a" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b1\x30f6", L"\xff79\x30b1" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a5", L"\xff73\x30a6" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8\x30a7", L"\xff74\x30a8" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b1", L"\xff79" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a8", L"\xff74" }, /* Kana small data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3046\x30a9", L"\x30a6\x30aa" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x304a\x3041", L"\x30aa\x3042" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x3059\x30a7", L"\x30b9\x30a8" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3046", L"\x30a6" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x304a", L"\x30aa" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3059", L"\x30b9" }, /* Kana small data must have priority over kana type data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a6\x30a8", L"\xff73\x3048" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30ab\x30a3", L"\xff76\x3043" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30b5\x30ac", L"\xff7b\x304c" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30a6", L"\xff73" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30ab", L"\xff76" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30b5", L"\xff7b" }, /* Kana type data must have priority over width data */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x305a a", L"\x30ba A" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30c1 b", L"\xff81 B" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff8b x", L"\x31f6 X" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x305a", L"\x30ba" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30c1", L"\xff81" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xff8b", L"\x31f6" }, /* Case weights have priority over extra weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0027\x31ff", L"\x007f\xff9b" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x07f5\x30f3", L"\x07f4\x3093" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xfe63\x30e0", L"\xff0d\x3080" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0027", L"\x007f" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x07f5", L"\x07f4" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xfe63", L"\xff0d" }, /* Extra weights have priority over special weights */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff68", L"\x30a3" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xff75", L"\x30aa" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x30e2", L"\xff93" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff68", L"\x30a3" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\xff75", L"\x30aa" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x30e2", L"\xff93" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30a8", L"\x3048" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x30af", L"\x304f" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREKANATYPE, L"\x3067", L"\x30c7" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30a8", L"\x3048" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x30af", L"\x304f" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3067", L"\x30c7" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb7", L"\x3147" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\xffb6", L"\x3146" },
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, NORM_IGNOREWIDTH, L"\x3145", L"\xffb5" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb7", L"\x3147" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\xffb6", L"\x3146" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x3145", L"\xffb5" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cff", L"\x30ba" }, /* Coptic < Japanese */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2cdb", L"\x32de" }, /* Coptic < Japanese */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, NORM_IGNORECASE, L"\x2ce0", L"\x30c6" }, /* Coptic < Japanese */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
|
||||
};
|
||||
|
||||
static void test_unicode_sorting(void)
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index bce705ab484..feec402cb61 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -2473,6 +2473,11 @@ enum sortkey_special_script
|
||||
|
||||
#define SORTKEY_MIN_WEIGHT 2
|
||||
|
||||
+const BYTE SORTKEY_FLAGS_EXTRA = 0xc4; /* Extra data added to the flags values */
|
||||
+const BYTE SORTKEY_FLAG_HIRAGANA = 0x20; /* if bit is set then hiragana, else katakana */
|
||||
+const BYTE SORTKEY_FLAG_LARGE = 0x02; /* if bit is set then normal kana, else small kana */
|
||||
+const BYTE SORTKEY_FLAG_FULLWIDTH = 0x01; /* if bit is set then full width, else half width */
|
||||
+
|
||||
struct character_info
|
||||
{
|
||||
BYTE weight_primary;
|
||||
@@ -2541,7 +2546,15 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_JAPANESE:
|
||||
- /* TODO */
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ sortkey_add_weight(data, 34);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ }
|
||||
break;
|
||||
|
||||
case SORTKEY_JAMO:
|
||||
@@ -2614,7 +2627,12 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
break;
|
||||
|
||||
case SORTKEY_JAPANESE:
|
||||
- /* TODO */
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
break;
|
||||
|
||||
case SORTKEY_JAMO:
|
||||
@@ -2658,7 +2676,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_JAPANESE:
|
||||
- /* TODO */
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ sortkey_add_case_weight(data, flags, SORTKEY_MIN_WEIGHT);
|
||||
break;
|
||||
|
||||
case SORTKEY_CJK:
|
||||
@@ -2706,10 +2729,75 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
|
||||
}
|
||||
}
|
||||
|
||||
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ if (info.script_member == SORTKEY_JAPANESE)
|
||||
+ {
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (!(flags & NORM_IGNORENONSPACE))
|
||||
+ {
|
||||
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_LARGE) | SORTKEY_FLAGS_EXTRA);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ if (info.script_member == SORTKEY_JAPANESE)
|
||||
+ {
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (flags & NORM_IGNOREKANATYPE)
|
||||
+ info.weight_case = 0;
|
||||
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_HIRAGANA) | SORTKEY_FLAGS_EXTRA);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+
|
||||
+ sortkey_get_char(&info, c);
|
||||
+
|
||||
+ if (info.script_member == SORTKEY_JAPANESE)
|
||||
+ {
|
||||
+ if (info.weight_primary <= 1)
|
||||
+ {
|
||||
+ /* TODO Kana iteration/repeat characters not implemented yet */
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (flags & NORM_IGNOREWIDTH)
|
||||
+ info.weight_case = 0;
|
||||
+ sortkey_add_weight(data, (info.weight_case & SORTKEY_FLAG_FULLWIDTH) | SORTKEY_FLAGS_EXTRA);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
|
||||
{
|
||||
static const BYTE SORTKEY_SEPARATOR = 1;
|
||||
static const BYTE SORTKEY_TERMINATOR = 0;
|
||||
+ static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
|
||||
int i;
|
||||
struct sortkey_data data;
|
||||
|
||||
@@ -2743,7 +2831,15 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Extra weights */
|
||||
- /* TODO */
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_extra_weights_small(&data, flags, str[i]);
|
||||
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_extra_weights_kana(&data, flags, str[i]);
|
||||
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
+ for (i = 0; i < str_len; i++)
|
||||
+ sortkey_add_extra_weights_width(&data, flags, str[i]);
|
||||
+ sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Special weights */
|
||||
--
|
||||
2.29.2
|
||||
|
@ -1,188 +0,0 @@
|
||||
From 9ccd944af35dc418a09a17ab70619b37e598ea43 Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Sat, 8 Aug 2020 16:49:45 +0200
|
||||
Subject: [PATCH] kernelbase: Implement sortkey expansion
|
||||
|
||||
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 6 +++
|
||||
dlls/kernelbase/locale.c | 91 +++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 96 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index 43a244d2a6b..e8adb32bbbd 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -3329,6 +3329,12 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d3", L"\x30ba" }, /* Hebrew > Japanese */
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05e3", L"\x32de" }, /* Hebrew > Japanese */
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, NORM_IGNORECASE, L"\x05d7", L"\x30c6" }, /* Hebrew > Japanese */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x00c6", L"\x0041\x0045" }, /* Expansion */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x0f5c", L"\x0f5b\x0fb7" }, /* Expansion */
|
||||
+ { L"en-US", CSTR_EQUAL, CSTR_EQUAL, 0, L"\x05f0", L"\x05d5\x05d5" }, /* Expansion */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
|
||||
};
|
||||
|
||||
static void test_unicode_sorting(void)
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index feec402cb61..da358d74934 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -2459,6 +2459,7 @@ enum sortkey_special_script
|
||||
{
|
||||
SORTKEY_UNSORTABLE = 0,
|
||||
SORTKEY_DIACRITIC = 1,
|
||||
+ SORTKEY_EXPANSION = 2,
|
||||
SORTKEY_JAPANESE = 3,
|
||||
SORTKEY_JAMO = 4,
|
||||
SORTKEY_CJK = 5,
|
||||
@@ -2496,13 +2497,27 @@ struct sortkey_data
|
||||
static void sortkey_get_char(struct character_info *info, WCHAR ch)
|
||||
{
|
||||
DWORD value = sort.keys[ch];
|
||||
-
|
||||
info->weight_case = value >> 24;
|
||||
info->weight_diacritic = (value >> 16) & 0xff;
|
||||
info->script_member = (value >> 8) & 0xff;
|
||||
info->weight_primary = value & 0xff;
|
||||
}
|
||||
|
||||
+static const WCHAR* sortkey_get_expansion(WCHAR ch)
|
||||
+{
|
||||
+ DWORD pos_info = sort.keys[ch];
|
||||
+ unsigned int pos = pos_info >> 16;
|
||||
+ const DWORD *ptr;
|
||||
+ unsigned int count_expansion;
|
||||
+ if ((WORD)pos_info != 0x200) /* Check for expansion magic number */
|
||||
+ return NULL;
|
||||
+ ptr = (const DWORD *)(sort.guids + sort.guid_count);
|
||||
+ count_expansion = *ptr++;
|
||||
+ if (pos >= count_expansion)
|
||||
+ return NULL;
|
||||
+ return (const WCHAR *)(ptr + pos);
|
||||
+}
|
||||
+
|
||||
|
||||
static BOOL sortkey_is_PUA(BYTE script_member)
|
||||
{
|
||||
@@ -2533,6 +2548,27 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
|
||||
*last_weighted_pos = data->buffer_pos;
|
||||
}
|
||||
|
||||
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+ const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
+ if (expansion)
|
||||
+ {
|
||||
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
+ sortkey_handle_expansion_main(data, flags, expansion[0]);
|
||||
+ sortkey_handle_expansion_main(data, flags, expansion[1]);
|
||||
+ return;
|
||||
+ }
|
||||
+ sortkey_get_char(&info, c);
|
||||
+ if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
+ {
|
||||
+ sortkey_add_weight(data, info.script_member);
|
||||
+ sortkey_add_weight(data, info.weight_primary);
|
||||
+ if (sortkey_is_PUA(info.script_member))
|
||||
+ sortkey_add_weight(data, info.weight_diacritic);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
{
|
||||
struct character_info info;
|
||||
@@ -2542,6 +2578,12 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
switch (info.script_member)
|
||||
{
|
||||
case SORTKEY_UNSORTABLE:
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_EXPANSION:
|
||||
+ sortkey_handle_expansion_main(data, flags, c);
|
||||
+ break;
|
||||
+
|
||||
case SORTKEY_DIACRITIC:
|
||||
break;
|
||||
|
||||
@@ -2598,6 +2640,25 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
}
|
||||
}
|
||||
|
||||
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+ const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
+ if (expansion)
|
||||
+ {
|
||||
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
|
||||
+ return;
|
||||
+ }
|
||||
+ sortkey_get_char(&info, c);
|
||||
+ if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
+ {
|
||||
+ if (!sortkey_is_PUA(info.script_member))
|
||||
+ sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
|
||||
{
|
||||
struct character_info info;
|
||||
@@ -2610,6 +2671,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
case SORTKEY_UNSORTABLE:
|
||||
break;
|
||||
|
||||
+ case SORTKEY_EXPANSION:
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
|
||||
+ break;
|
||||
+
|
||||
case SORTKEY_DIACRITIC:
|
||||
old_pos = data->buffer_pos - 1;
|
||||
/*
|
||||
@@ -2663,6 +2728,24 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+{
|
||||
+ struct character_info info;
|
||||
+ const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
+ if (expansion)
|
||||
+ {
|
||||
+ /* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
+ sortkey_handle_expansion_case(data, flags, expansion[0]);
|
||||
+ sortkey_handle_expansion_case(data, flags, expansion[1]);
|
||||
+ return;
|
||||
+ }
|
||||
+ sortkey_get_char(&info, c);
|
||||
+ if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
+ {
|
||||
+ sortkey_add_case_weight(data, flags, info.weight_case);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
{
|
||||
struct character_info info;
|
||||
@@ -2672,6 +2755,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
switch (info.script_member)
|
||||
{
|
||||
case SORTKEY_UNSORTABLE:
|
||||
+ break;
|
||||
+
|
||||
+ case SORTKEY_EXPANSION:
|
||||
+ sortkey_handle_expansion_case(data, flags, c);
|
||||
+ break;
|
||||
+
|
||||
case SORTKEY_DIACRITIC:
|
||||
break;
|
||||
|
||||
--
|
||||
2.29.2
|
||||
|
@ -1,377 +0,0 @@
|
||||
From 74ed8758a238f1747055b4c8fa78edc2d5e7aba9 Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Sat, 8 Aug 2020 17:32:56 +0200
|
||||
Subject: [PATCH] kernelbase: Implement sortkey language support
|
||||
|
||||
Signed-off-by: Fabian Maurer <dark.shadow4@web.de>
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 50 ++++++++++++++++++
|
||||
dlls/kernelbase/locale.c | 99 +++++++++++++++++++++---------------
|
||||
2 files changed, 109 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index 0aaa87e38c1..84931318075 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -3210,6 +3210,56 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\x0f75", L"\x0f71\x0f74" }, /* Expansion character always follow default character logic */
|
||||
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfc5e", L"\x064c\x0651" }, /* Expansion character always follow default character logic */
|
||||
{ L"en-US", CSTR_LESS_THAN, CSTR_EQUAL, 0, L"\xfb2b", L"\x05e9\x05c2" }, /* Expansion character always follow default character logic */
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6df8", L"\x654b\x29e9" }, /* Japanese locale */
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x685d\x1239\x1b61", L"\x59b6\x6542\x2a62\x04a7" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x62f3\x43e9", L"\x5760" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x634c", L"\x2f0d\x5f1c\x7124" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x69e7\x0502", L"\x57cc" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7589", L"\x67c5" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5ede\x765c", L"\x7324" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5c7f\x5961", L"\x7cbe" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x3162", L"\x6a84\x1549\x0b60" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x769e\x448e", L"\x4e6e" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x59a4", L"\x5faa\x607c" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x529b", L"\x733f" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6ff8\x2a0a", L"\x7953\x6712" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6dfb", L"\x6793" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x67ed", L"\x6aa2" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x4e61", L"\x6350\x6b08" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5118", L"\x53b3\x75b4" },
|
||||
+ { L"ja-JP", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6bbf", L"\x65a3" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x5690", L"\x5fa8" },
|
||||
+ { L"ja-JP", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x61e2", L"\x76e5" },
|
||||
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8db6", L"\xd198" },
|
||||
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x8f72", L"\xd2b9" },
|
||||
+ { L"ko-KR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x91d8", L"\xd318" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8db6", L"\xd198" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x8f72", L"\xd2b9" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x91d8", L"\xd318" },
|
||||
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0160", L"\x0219" },
|
||||
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x059a", L"\x0308" },
|
||||
+ { L"cs-CZ", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x013a", L"\x013f" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0160", L"\x0219" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x059a", L"\x0308" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x013a", L"\x013f" },
|
||||
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x1d8f", L"\x1ea8" },
|
||||
+ { L"vi-VN", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0323", L"\xfe26" },
|
||||
+ { L"vi-VN", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"R", L"\xff32" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x1d8f", L"\x1ea8" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0323", L"\xfe26" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"R", L"\xff32" },
|
||||
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x83ae", L"\x71b9" },
|
||||
+ { L"zh-HK", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x7e50", L"\xc683" },
|
||||
+ { L"zh-HK", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x6c69", L"\x7f8a" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x83ae", L"\x71b9" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x7e50", L"\xc683" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x6c69", L"\x7f8a" },
|
||||
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00dc", L"\x1ee9" },
|
||||
+ { L"tr-TR", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x00fc", L"\x1ee6" },
|
||||
+ { L"tr-TR", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x0152", L"\x00d6" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
|
||||
+ { L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
|
||||
};
|
||||
|
||||
static void test_unicode_sorting(void)
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index e81ed0e0f2e..394912307a9 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -2530,9 +2530,27 @@ struct sortkey_data
|
||||
int buffer_len;
|
||||
};
|
||||
|
||||
-static void sortkey_get_char(struct character_info *info, WCHAR ch)
|
||||
+static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
|
||||
{
|
||||
- DWORD value = sort.keys[ch];
|
||||
+ if (locale && locale->except)
|
||||
+ {
|
||||
+ DWORD *table = sort.keys + locale->except;
|
||||
+ DWORD hi = ch >> 8;
|
||||
+ DWORD lo = ch & 0xff;
|
||||
+ if (table[hi] == hi * 0x100)
|
||||
+ return 0;
|
||||
+ if (sort.keys[table[hi] + lo] == sort.keys[hi * 0x100 + lo])
|
||||
+ return 0;
|
||||
+ return sort.keys[table[hi] + lo];
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void sortkey_get_char(struct character_info *info, WCHAR ch, const struct sortguid *locale)
|
||||
+{
|
||||
+ DWORD value = sortkey_get_exception(ch, locale);
|
||||
+ if (!value)
|
||||
+ value = sort.keys[ch];
|
||||
info->weight_case = value >> 24;
|
||||
info->weight_diacritic = (value >> 16) & 0xff;
|
||||
info->script_member = (value >> 8) & 0xff;
|
||||
@@ -2584,18 +2602,18 @@ static void sortkey_add_diacritic_weight(struct sortkey_data *data, BYTE value,
|
||||
*last_weighted_pos = data->buffer_pos;
|
||||
}
|
||||
|
||||
-static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
if (expansion)
|
||||
{
|
||||
/* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
- sortkey_handle_expansion_main(data, flags, expansion[0]);
|
||||
- sortkey_handle_expansion_main(data, flags, expansion[1]);
|
||||
+ sortkey_handle_expansion_main(data, flags, expansion[0], locale);
|
||||
+ sortkey_handle_expansion_main(data, flags, expansion[1], locale);
|
||||
return;
|
||||
}
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
{
|
||||
sortkey_add_weight(data, info.script_member);
|
||||
@@ -2605,11 +2623,11 @@ static void sortkey_handle_expansion_main(struct sortkey_data *data, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
switch (info.script_member)
|
||||
{
|
||||
@@ -2617,7 +2635,7 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_EXPANSION:
|
||||
- sortkey_handle_expansion_main(data, flags, c);
|
||||
+ sortkey_handle_expansion_main(data, flags, c, locale);
|
||||
break;
|
||||
|
||||
case SORTKEY_DIACRITIC:
|
||||
@@ -2676,18 +2694,18 @@ static void sortkey_add_main_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos)
|
||||
+static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
if (expansion)
|
||||
{
|
||||
/* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
- sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos);
|
||||
- sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos);
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, expansion[0], last_weighted_pos, locale);
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, expansion[1], last_weighted_pos, locale);
|
||||
return;
|
||||
}
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
{
|
||||
if (!sortkey_is_PUA(info.script_member))
|
||||
@@ -2695,12 +2713,12 @@ static void sortkey_handle_expansion_diacritic(struct sortkey_data *data, int fl
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos)
|
||||
+static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags, WCHAR c, int *last_weighted_pos, int diacritic_start_pos, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
int old_pos;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
switch (info.script_member)
|
||||
{
|
||||
@@ -2708,7 +2726,7 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
break;
|
||||
|
||||
case SORTKEY_EXPANSION:
|
||||
- sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos);
|
||||
+ sortkey_handle_expansion_diacritic(data, flags, c, last_weighted_pos, locale);
|
||||
break;
|
||||
|
||||
case SORTKEY_DIACRITIC:
|
||||
@@ -2764,29 +2782,29 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_handle_expansion_case(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
const WCHAR *expansion = sortkey_get_expansion(c);
|
||||
if (expansion)
|
||||
{
|
||||
/* Expansion characters always follow default character logic, ignoring the script_member value */
|
||||
- sortkey_handle_expansion_case(data, flags, expansion[0]);
|
||||
- sortkey_handle_expansion_case(data, flags, expansion[1]);
|
||||
+ sortkey_handle_expansion_case(data, flags, expansion[0], locale);
|
||||
+ sortkey_handle_expansion_case(data, flags, expansion[1], locale);
|
||||
return;
|
||||
}
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
if (info.script_member != SORTKEY_UNSORTABLE)
|
||||
{
|
||||
sortkey_add_case_weight(data, flags, info.weight_case);
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
switch (info.script_member)
|
||||
{
|
||||
@@ -2794,7 +2812,7 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
break;
|
||||
|
||||
case SORTKEY_EXPANSION:
|
||||
- sortkey_handle_expansion_case(data, flags, c);
|
||||
+ sortkey_handle_expansion_case(data, flags, c, locale);
|
||||
break;
|
||||
|
||||
case SORTKEY_DIACRITIC:
|
||||
@@ -2836,12 +2854,12 @@ static void sortkey_add_case_weights(struct sortkey_data *data, int flags, WCHAR
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
BYTE weight_second;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
if (info.script_member == SORTKEY_PUNCTUATION)
|
||||
{
|
||||
@@ -2854,11 +2872,11 @@ static void sortkey_add_special_weights(struct sortkey_data *data, int flags, WC
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
if (info.script_member == SORTKEY_JAPANESE)
|
||||
{
|
||||
@@ -2876,11 +2894,11 @@ static void sortkey_add_extra_weights_small(struct sortkey_data *data, int flags
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
if (info.script_member == SORTKEY_JAPANESE)
|
||||
{
|
||||
@@ -2897,11 +2915,11 @@ static void sortkey_add_extra_weights_kana(struct sortkey_data *data, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
-static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c)
|
||||
+static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags, WCHAR c, const struct sortguid *locale)
|
||||
{
|
||||
struct character_info info;
|
||||
|
||||
- sortkey_get_char(&info, c);
|
||||
+ sortkey_get_char(&info, c, locale);
|
||||
|
||||
if (info.script_member == SORTKEY_JAPANESE)
|
||||
{
|
||||
@@ -2918,13 +2936,14 @@ static void sortkey_add_extra_weights_width(struct sortkey_data *data, int flags
|
||||
}
|
||||
}
|
||||
|
||||
-static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
|
||||
+static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *str, int str_len, BYTE *buffer, int buffer_len)
|
||||
{
|
||||
static const BYTE SORTKEY_SEPARATOR = 1;
|
||||
static const BYTE SORTKEY_TERMINATOR = 0;
|
||||
static const BYTE SORTKEY_EXTRA_SEPARATOR = 0xff;
|
||||
int i;
|
||||
struct sortkey_data data;
|
||||
+ const struct sortguid *locale = get_language_sort(locale_name);
|
||||
|
||||
data.buffer = buffer;
|
||||
data.buffer_pos = 0;
|
||||
@@ -2935,7 +2954,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
|
||||
|
||||
/* Main weights */
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_main_weights(&data, flags, str[i]);
|
||||
+ sortkey_add_main_weights(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Diacritic weights */
|
||||
@@ -2944,7 +2963,7 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
|
||||
int diacritic_start_pos = data.buffer_pos;
|
||||
int last_weighted_pos = data.buffer_pos;
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos);
|
||||
+ sortkey_add_diacritic_weights(&data, flags, str[i], &last_weighted_pos, diacritic_start_pos, locale);
|
||||
/* Remove all weights <= SORTKEY_MIN_WEIGHT from the end */
|
||||
data.buffer_pos = last_weighted_pos;
|
||||
}
|
||||
@@ -2952,24 +2971,24 @@ static int sortkey_generate(int flags, const WCHAR *locale, const WCHAR *str, in
|
||||
|
||||
/* Case weights */
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_case_weights(&data, flags, str[i]);
|
||||
+ sortkey_add_case_weights(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Extra weights */
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_extra_weights_small(&data, flags, str[i]);
|
||||
+ sortkey_add_extra_weights_small(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_extra_weights_kana(&data, flags, str[i]);
|
||||
+ sortkey_add_extra_weights_kana(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_extra_weights_width(&data, flags, str[i]);
|
||||
+ sortkey_add_extra_weights_width(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_EXTRA_SEPARATOR);
|
||||
sortkey_add_weight(&data, SORTKEY_SEPARATOR);
|
||||
|
||||
/* Special weights */
|
||||
for (i = 0; i < str_len; i++)
|
||||
- sortkey_add_special_weights(&data, flags, str[i]);
|
||||
+ sortkey_add_special_weights(&data, flags, str[i], locale);
|
||||
sortkey_add_weight(&data, SORTKEY_TERMINATOR);
|
||||
|
||||
if (data.buffer_pos <= buffer_len || !buffer)
|
||||
@@ -5667,7 +5686,7 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
|
||||
TRACE( "(%s,0x%08lx,%s,%d,%p,%d)\n",
|
||||
debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
|
||||
|
||||
- if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
|
||||
+ if (!(ret = sortkey_generate(flags, locale, src, srclen, (BYTE *)dst, dstlen )))
|
||||
SetLastError( ERROR_INSUFFICIENT_BUFFER );
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
@ -1,446 +0,0 @@
|
||||
From 11619cf1483a96e329f640a0abcc571857a87a73 Mon Sep 17 00:00:00 2001
|
||||
From: Fabian Maurer <dark.shadow4@web.de>
|
||||
Date: Sun, 6 Dec 2020 20:57:16 +0100
|
||||
Subject: [PATCH] kernelbase: Implement CompareString functions
|
||||
|
||||
---
|
||||
dlls/kernel32/tests/locale.c | 33 +++--
|
||||
dlls/kernelbase/locale.c | 261 ++++++++++++++++++-----------------
|
||||
2 files changed, 149 insertions(+), 145 deletions(-)
|
||||
|
||||
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
|
||||
index 08723c4b865..23054727ad7 100644
|
||||
--- a/dlls/kernel32/tests/locale.c
|
||||
+++ b/dlls/kernel32/tests/locale.c
|
||||
@@ -1894,16 +1894,16 @@ static void test_CompareStringA(void)
|
||||
"a\\0b vs a expected CSTR_EQUAL or CSTR_GREATER_THAN, got %d\n", ret);
|
||||
|
||||
ret = CompareStringA(lcid, 0, "\2", 2, "\1", 2);
|
||||
- todo_wine ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
|
||||
+ ok(ret != CSTR_EQUAL, "\\2 vs \\1 expected unequal\n");
|
||||
|
||||
ret = CompareStringA(lcid, NORM_IGNORECASE | LOCALE_USE_CP_ACP, "#", -1, ".", -1);
|
||||
- todo_wine ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
|
||||
+ ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
|
||||
|
||||
ret = CompareStringA(lcid, NORM_IGNORECASE, "_", -1, ".", -1);
|
||||
- todo_wine ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
|
||||
+ ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
|
||||
|
||||
ret = lstrcmpiA("#", ".");
|
||||
- todo_wine ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
|
||||
+ ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
|
||||
|
||||
lcid = MAKELCID(MAKELANGID(LANG_POLISH, SUBLANG_DEFAULT), SORT_DEFAULT);
|
||||
|
||||
@@ -1989,9 +1989,9 @@ static void test_CompareStringW(void)
|
||||
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, ABC_EE, 3, A_ACUTE_BC, 4);
|
||||
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, ABC_EE, 4, A_ACUTE_BC_DECOMP, 5);
|
||||
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_ACUTE_BC, 4, A_ACUTE_BC_DECOMP, 5);
|
||||
ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
|
||||
@@ -2003,12 +2003,12 @@ static void test_CompareStringW(void)
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, 0, A_NULL_BC, 4, A_ACUTE_BC, 4);
|
||||
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC, 4);
|
||||
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, 0, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
|
||||
ok(ret == CSTR_LESS_THAN, "expected CSTR_LESS_THAN, got %d\n", ret);
|
||||
ret = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORENONSPACE, A_NULL_BC, 4, A_ACUTE_BC_DECOMP, 5);
|
||||
- todo_wine ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
+ ok(ret == CSTR_EQUAL, "expected CSTR_EQUAL, got %d\n", ret);
|
||||
}
|
||||
|
||||
struct comparestringex_test {
|
||||
@@ -2045,7 +2045,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 5 */
|
||||
"tr-TR", 0,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
|
||||
},
|
||||
/* with NORM_IGNORECASE */
|
||||
{ /* 6 */
|
||||
@@ -2070,7 +2070,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 11 */
|
||||
"tr-TR", NORM_IGNORECASE,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
|
||||
},
|
||||
/* with NORM_LINGUISTIC_CASING */
|
||||
{ /* 12 */
|
||||
@@ -2095,7 +2095,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 17 */
|
||||
"tr-TR", NORM_LINGUISTIC_CASING,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
|
||||
},
|
||||
/* with LINGUISTIC_IGNORECASE */
|
||||
{ /* 18 */
|
||||
@@ -2120,7 +2120,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 23 */
|
||||
"tr-TR", LINGUISTIC_IGNORECASE,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
|
||||
},
|
||||
/* with NORM_LINGUISTIC_CASING | NORM_IGNORECASE */
|
||||
{ /* 24 */
|
||||
@@ -2145,7 +2145,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 29 */
|
||||
"tr-TR", NORM_LINGUISTIC_CASING | NORM_IGNORECASE,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, -1, FALSE
|
||||
},
|
||||
/* with NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE */
|
||||
{ /* 30 */
|
||||
@@ -2170,7 +2170,7 @@ static const struct comparestringex_test comparestringex_tests[] = {
|
||||
},
|
||||
{ /* 35 */
|
||||
"tr-TR", NORM_LINGUISTIC_CASING | LINGUISTIC_IGNORECASE,
|
||||
- {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, TRUE
|
||||
+ {0x130,0}, {0x131,0}, CSTR_GREATER_THAN, CSTR_LESS_THAN, FALSE
|
||||
}
|
||||
};
|
||||
|
||||
@@ -3540,6 +3540,9 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
|
||||
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00dc", L"\x1ee9" },
|
||||
{ L"en-US", CSTR_LESS_THAN, CSTR_LESS_THAN, 0, L"\x00fc", L"\x1ee6" },
|
||||
{ L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\x0152", L"\x00d6" },
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa042\x09bc", L"\xa042" }, /* Diacritic is added */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa063\x302b", L"\xa063" }, /* Diacritic is added */
|
||||
+ { L"en-US", CSTR_GREATER_THAN, CSTR_GREATER_THAN, 0, L"\xa07e\x0c56", L"\xa07e" }, /* Diacritic is added */
|
||||
};
|
||||
|
||||
static void test_unicode_sorting(void)
|
||||
@@ -6537,7 +6540,7 @@ static void test_FindNLSStringEx(void)
|
||||
{ localeW, FIND_FROMSTART, comb_s_accent1W, ARRAY_SIZE(comb_s_accent1W)-1,
|
||||
comb_s_accent2W, ARRAY_SIZE(comb_s_accent2W)-1, 0, 0, 6, 1, TRUE },
|
||||
{ localeW, FIND_FROMSTART, comb_q_accent1W, ARRAY_SIZE(comb_q_accent1W)-1,
|
||||
- comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 1, FALSE },
|
||||
+ comb_q_accent2W, ARRAY_SIZE(comb_q_accent2W)-1, 0, 0, 7, 0, FALSE },
|
||||
{ 0 }
|
||||
};
|
||||
struct test_data *ptest;
|
||||
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
|
||||
index bc0e3b878c3..6ec5a0fad41 100644
|
||||
--- a/dlls/kernelbase/locale.c
|
||||
+++ b/dlls/kernelbase/locale.c
|
||||
@@ -3156,126 +3156,6 @@ static int map_to_halfwidth( WCHAR c, WCHAR *dst, int dstlen )
|
||||
return 1;
|
||||
}
|
||||
|
||||
-
|
||||
-/* 32-bit collation element table format:
|
||||
- * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
|
||||
- * case weight - high 4 bit of low 8 bit.
|
||||
- */
|
||||
-
|
||||
-enum weight { UNICODE_WEIGHT, DIACRITIC_WEIGHT, CASE_WEIGHT };
|
||||
-
|
||||
-static unsigned int get_weight( WCHAR ch, enum weight type )
|
||||
-{
|
||||
- unsigned int ret;
|
||||
-
|
||||
- ret = collation_table[collation_table[collation_table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
|
||||
- if (ret == ~0u) return ch;
|
||||
-
|
||||
- switch (type)
|
||||
- {
|
||||
- case UNICODE_WEIGHT: return ret >> 16;
|
||||
- case DIACRITIC_WEIGHT: return (ret >> 8) & 0xff;
|
||||
- case CASE_WEIGHT: return (ret >> 4) & 0x0f;
|
||||
- default: return 0;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-
|
||||
-static void inc_str_pos( const WCHAR **str, int *len, unsigned int *dpos, unsigned int *dlen )
|
||||
-{
|
||||
- (*dpos)++;
|
||||
- if (*dpos == *dlen)
|
||||
- {
|
||||
- *dpos = *dlen = 0;
|
||||
- (*str)++;
|
||||
- (*len)--;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-
|
||||
-static int compare_weights(int flags, const WCHAR *str1, int len1,
|
||||
- const WCHAR *str2, int len2, enum weight type )
|
||||
-{
|
||||
- unsigned int ce1, ce2, dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
|
||||
- const WCHAR *dstr1 = NULL, *dstr2 = NULL;
|
||||
-
|
||||
- while (len1 > 0 && len2 > 0)
|
||||
- {
|
||||
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
|
||||
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
|
||||
-
|
||||
- if (flags & NORM_IGNORESYMBOLS)
|
||||
- {
|
||||
- int skip = 0;
|
||||
- /* FIXME: not tested */
|
||||
- if (get_char_type( CT_CTYPE1, dstr1[dpos1] ) & (C1_PUNCT | C1_SPACE))
|
||||
- {
|
||||
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
|
||||
- skip = 1;
|
||||
- }
|
||||
- if (get_char_type( CT_CTYPE1, dstr2[dpos2] ) & (C1_PUNCT | C1_SPACE))
|
||||
- {
|
||||
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
|
||||
- skip = 1;
|
||||
- }
|
||||
- if (skip) continue;
|
||||
- }
|
||||
-
|
||||
- /* hyphen and apostrophe are treated differently depending on
|
||||
- * whether SORT_STRINGSORT specified or not
|
||||
- */
|
||||
- if (type == UNICODE_WEIGHT && !(flags & SORT_STRINGSORT))
|
||||
- {
|
||||
- if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
|
||||
- {
|
||||
- if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
|
||||
- {
|
||||
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
|
||||
- continue;
|
||||
- }
|
||||
- }
|
||||
- else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
|
||||
- {
|
||||
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
|
||||
- continue;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- ce1 = get_weight( dstr1[dpos1], type );
|
||||
- if (!ce1)
|
||||
- {
|
||||
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
|
||||
- continue;
|
||||
- }
|
||||
- ce2 = get_weight( dstr2[dpos2], type );
|
||||
- if (!ce2)
|
||||
- {
|
||||
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- if (ce1 - ce2) return ce1 - ce2;
|
||||
-
|
||||
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
|
||||
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
|
||||
- }
|
||||
- while (len1)
|
||||
- {
|
||||
- if (!dlen1 && !(dstr1 = get_decomposition( *str1, &dlen1 ))) dstr1 = str1;
|
||||
- ce1 = get_weight( dstr1[dpos1], type );
|
||||
- if (ce1) break;
|
||||
- inc_str_pos( &str1, &len1, &dpos1, &dlen1 );
|
||||
- }
|
||||
- while (len2)
|
||||
- {
|
||||
- if (!dlen2 && !(dstr2 = get_decomposition( *str2, &dlen2 ))) dstr2 = str2;
|
||||
- ce2 = get_weight( dstr2[dpos2], type );
|
||||
- if (ce2) break;
|
||||
- inc_str_pos( &str2, &len2, &dpos2, &dlen2 );
|
||||
- }
|
||||
- return len1 - len2;
|
||||
-}
|
||||
-
|
||||
enum sortkey_special_script
|
||||
{
|
||||
SORTKEY_UNSORTABLE = 0,
|
||||
@@ -3313,6 +3193,7 @@ struct sortkey_data
|
||||
BYTE *buffer;
|
||||
int buffer_pos;
|
||||
int buffer_len;
|
||||
+ BOOL is_compare_string;
|
||||
};
|
||||
|
||||
static DWORD sortkey_get_exception(WCHAR ch, const struct sortguid *locale)
|
||||
@@ -3524,7 +3405,10 @@ static void sortkey_add_diacritic_weights(struct sortkey_data *data, int flags,
|
||||
if (old_pos >= diacritic_start_pos)
|
||||
{
|
||||
if (old_pos < data->buffer_len)
|
||||
+ {
|
||||
data->buffer[old_pos] += info.weight_diacritic; /* Overflow can happen, that's okay */
|
||||
+ *last_weighted_pos = data->buffer_pos;
|
||||
+ }
|
||||
}
|
||||
else
|
||||
sortkey_add_diacritic_weight(data, info.weight_diacritic, last_weighted_pos);
|
||||
@@ -3733,6 +3617,7 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
|
||||
data.buffer = buffer;
|
||||
data.buffer_pos = 0;
|
||||
data.buffer_len = buffer ? buffer_len : 0;
|
||||
+ data.is_compare_string = FALSE;
|
||||
|
||||
if (str_len == -1)
|
||||
str_len = wcslen(str);
|
||||
@@ -3782,6 +3667,130 @@ static int sortkey_generate(int flags, const WCHAR *locale_name, const WCHAR *st
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int early_exit_sortkey_comparison(const struct sortkey_data* data1, const struct sortkey_data* data2, int start_index)
|
||||
+{
|
||||
+ int i;
|
||||
+ int end_index = min(data1->buffer_pos, data2->buffer_pos);
|
||||
+
|
||||
+ for (i = start_index; i < end_index; i++)
|
||||
+ {
|
||||
+ BYTE weight1 = data1->buffer[i];
|
||||
+ BYTE weight2 = data2->buffer[i];
|
||||
+
|
||||
+ if (weight1 > weight2) return CSTR_GREATER_THAN;
|
||||
+ if (weight1 < weight2) return CSTR_LESS_THAN;
|
||||
+ }
|
||||
+
|
||||
+ return CSTR_EQUAL;
|
||||
+}
|
||||
+
|
||||
+static int sortkey_compare(int flags, const WCHAR *locale_name, const WCHAR *str1, int str1_len, const WCHAR *str2, int str2_len)
|
||||
+{
|
||||
+ int i1, i2;
|
||||
+ int ret;
|
||||
+ struct sortkey_data data1, data2;
|
||||
+ const struct sortguid *locale = get_language_sort(locale_name);
|
||||
+ int diacritic_start_pos1;
|
||||
+ int last_weighted_pos1;
|
||||
+ int diacritic_start_pos2;
|
||||
+ int last_weighted_pos2;
|
||||
+ int pos_weight_compare;
|
||||
+
|
||||
+ BYTE buffer1[10000];
|
||||
+ BYTE buffer2[10000];
|
||||
+
|
||||
+ data1.buffer = buffer1;
|
||||
+ data1.buffer_pos = 0;
|
||||
+ data1.buffer_len = sizeof(buffer1);
|
||||
+ data1.is_compare_string = TRUE;
|
||||
+
|
||||
+ data2.buffer = buffer2;
|
||||
+ data2.buffer_pos = 0;
|
||||
+ data2.buffer_len = sizeof(buffer2);
|
||||
+ data2.is_compare_string = TRUE;
|
||||
+
|
||||
+ /* Main weights */
|
||||
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
|
||||
+ {
|
||||
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
|
||||
+ if (i1 < str1_len)
|
||||
+ {
|
||||
+ sortkey_add_main_weights(&data1, flags, str1[i1], locale);
|
||||
+ }
|
||||
+ if (i2 < str2_len)
|
||||
+ {
|
||||
+ sortkey_add_main_weights(&data2, flags, str2[i2], locale);
|
||||
+ }
|
||||
+
|
||||
+ /* For clear differences we must return early without reading all characters. See tests. */
|
||||
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
|
||||
+ if (ret != CSTR_EQUAL)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ if (data1.buffer_pos > data2.buffer_pos)
|
||||
+ return CSTR_GREATER_THAN;
|
||||
+ if (data1.buffer_pos < data2.buffer_pos)
|
||||
+ return CSTR_LESS_THAN;
|
||||
+
|
||||
+ diacritic_start_pos1 = data1.buffer_pos;
|
||||
+ last_weighted_pos1 = data1.buffer_pos;
|
||||
+ diacritic_start_pos2 = data2.buffer_pos;
|
||||
+ last_weighted_pos2 = data2.buffer_pos;
|
||||
+ pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
|
||||
+ /* Diacritic weights */
|
||||
+ if (!(flags & NORM_IGNORENONSPACE))
|
||||
+ {
|
||||
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
|
||||
+ {
|
||||
+ if (i1 < str1_len)
|
||||
+ {
|
||||
+ sortkey_add_diacritic_weights(&data1, flags, str1[i1], &last_weighted_pos1, diacritic_start_pos1, locale);
|
||||
+ }
|
||||
+ if (i2 < str2_len)
|
||||
+ {
|
||||
+ sortkey_add_diacritic_weights(&data2, flags, str2[i2], &last_weighted_pos2, diacritic_start_pos2, locale);
|
||||
+ }
|
||||
+ }
|
||||
+ data1.buffer_pos = last_weighted_pos1;
|
||||
+ data2.buffer_pos = last_weighted_pos2;
|
||||
+
|
||||
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
|
||||
+ if (ret != CSTR_EQUAL)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (data1.buffer_pos > data2.buffer_pos)
|
||||
+ return CSTR_GREATER_THAN;
|
||||
+ if (data1.buffer_pos < data2.buffer_pos)
|
||||
+ return CSTR_LESS_THAN;
|
||||
+ }
|
||||
+
|
||||
+ /* Special weights */
|
||||
+ for (i1 = 0, i2 = 0; i1 < str1_len || i2 < str2_len; i1++, i2++)
|
||||
+ {
|
||||
+ int pos_weight_compare = min(data1.buffer_pos, data2.buffer_pos);
|
||||
+ if (i1 < str1_len)
|
||||
+ {
|
||||
+ sortkey_add_special_weights(&data1, flags, str1[i1], locale);
|
||||
+ }
|
||||
+ if (i2 < str2_len)
|
||||
+ {
|
||||
+ sortkey_add_special_weights(&data2, flags, str2[i2], locale);
|
||||
+ }
|
||||
+
|
||||
+ ret = early_exit_sortkey_comparison(&data1, &data2, pos_weight_compare);
|
||||
+ if (ret != CSTR_EQUAL)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ if (data1.buffer_pos > data2.buffer_pos)
|
||||
+ return CSTR_GREATER_THAN;
|
||||
+ if (data1.buffer_pos < data2.buffer_pos)
|
||||
+ return CSTR_LESS_THAN;
|
||||
+
|
||||
+ return CSTR_EQUAL;
|
||||
+}
|
||||
+
|
||||
static int compare_tzdate( const TIME_FIELDS *tf, const SYSTEMTIME *compare )
|
||||
{
|
||||
static const int month_lengths[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
|
||||
@@ -4317,16 +4326,8 @@ INT WINAPI CompareStringEx( const WCHAR *locale, DWORD flags, const WCHAR *str1,
|
||||
if (len1 < 0) len1 = lstrlenW(str1);
|
||||
if (len2 < 0) len2 = lstrlenW(str2);
|
||||
|
||||
- ret = compare_weights( flags, str1, len1, str2, len2, UNICODE_WEIGHT );
|
||||
- if (!ret)
|
||||
- {
|
||||
- if (!(flags & NORM_IGNORENONSPACE))
|
||||
- ret = compare_weights( flags, str1, len1, str2, len2, DIACRITIC_WEIGHT );
|
||||
- if (!ret && !(flags & NORM_IGNORECASE))
|
||||
- ret = compare_weights( flags, str1, len1, str2, len2, CASE_WEIGHT );
|
||||
- }
|
||||
- if (!ret) return CSTR_EQUAL;
|
||||
- return (ret < 0) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
|
||||
+ ret = sortkey_compare(flags, locale, str1, len1, str2, len2);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
|
||||
--
|
||||
2.35.1
|
||||
|
@ -1,3 +0,0 @@
|
||||
Fixes: [5163] Microsoft Office XP 2002 installer reports error 25003 (installation source corrupted), custom action 'CADpc' returns 1603
|
||||
Fixes: [10767] Fix comparison of punctuation characters in lstrcmp
|
||||
Fixes: [32490] Graphical issues in Inquisitor
|
@ -51,7 +51,7 @@ usage()
|
||||
# Get the upstream commit sha
|
||||
upstream_commit()
|
||||
{
|
||||
echo "99ce6e87a3b22c5602d7bbedd43bb40627b63321"
|
||||
echo "4312d209232c701b0b78d9f8b463917c989005c5"
|
||||
}
|
||||
|
||||
# Show version information
|
||||
@ -123,7 +123,6 @@ patch_enable_all ()
|
||||
enable_kernel32_Processor_Group="$1"
|
||||
enable_krnl386_exe16_GDT_LDT_Emulation="$1"
|
||||
enable_krnl386_exe16_Invalid_Console_Handles="$1"
|
||||
enable_libs_Unicode_Collation="$1"
|
||||
enable_loader_KeyboardLayouts="$1"
|
||||
enable_mmsystem_dll16_MIDIHDR_Refcount="$1"
|
||||
enable_mountmgr_DosDevices="$1"
|
||||
@ -394,9 +393,6 @@ patch_enable ()
|
||||
krnl386.exe16-Invalid_Console_Handles)
|
||||
enable_krnl386_exe16_Invalid_Console_Handles="$2"
|
||||
;;
|
||||
libs-Unicode_Collation)
|
||||
enable_libs_Unicode_Collation="$2"
|
||||
;;
|
||||
loader-KeyboardLayouts)
|
||||
enable_loader_KeyboardLayouts="$2"
|
||||
;;
|
||||
@ -2112,26 +2108,6 @@ if test "$enable_krnl386_exe16_Invalid_Console_Handles" -eq 1; then
|
||||
patch_apply krnl386.exe16-Invalid_Console_Handles/0001-krnl386.exe16-Really-translate-all-invalid-console-h.patch
|
||||
fi
|
||||
|
||||
# Patchset libs-Unicode_Collation
|
||||
# |
|
||||
# | This patchset fixes the following Wine bugs:
|
||||
# | * [#5163] Microsoft Office XP 2002 installer reports error 25003 (installation source corrupted), custom action 'CADpc'
|
||||
# | returns 1603
|
||||
# | * [#10767] Fix comparison of punctuation characters in lstrcmp
|
||||
# | * [#32490] Graphical issues in Inquisitor
|
||||
# |
|
||||
# | Modified files:
|
||||
# | * dlls/kernel32/tests/locale.c, dlls/kernelbase/locale.c
|
||||
# |
|
||||
if test "$enable_libs_Unicode_Collation" -eq 1; then
|
||||
patch_apply libs-Unicode_Collation/0001-kernelbase-Implement-sortkey-generation-on-official-.patch
|
||||
patch_apply libs-Unicode_Collation/0002-kernelbase-Implement-sortkey-punctuation.patch
|
||||
patch_apply libs-Unicode_Collation/0003-kernelbase-Implement-sortkey-for-Japanese-characters.patch
|
||||
patch_apply libs-Unicode_Collation/0004-kernelbase-Implement-sortkey-expansion.patch
|
||||
patch_apply libs-Unicode_Collation/0005-kernelbase-Implement-sortkey-language-support.patch
|
||||
patch_apply libs-Unicode_Collation/0006-kernelbase-Implement-CompareString-functions.patch
|
||||
fi
|
||||
|
||||
# Patchset loader-KeyboardLayouts
|
||||
# |
|
||||
# | This patchset fixes the following Wine bugs:
|
||||
|
@ -1 +1 @@
|
||||
99ce6e87a3b22c5602d7bbedd43bb40627b63321
|
||||
4312d209232c701b0b78d9f8b463917c989005c5
|
||||
|
Loading…
Reference in New Issue
Block a user