6bdd276d05
Former-commit-id: fd56571888259555122d8a0f58c68838229cea2b
110 lines
4.3 KiB
Diff
110 lines
4.3 KiB
Diff
661f8c0b920f5da Mon Sep 17 00:00:00 2001
|
|
From: Kristian Rietveld <kris@lanedo.com>
|
|
Date: Tue, 19 Mar 2013 11:23:49 +0100
|
|
Subject: [PATCH 2/2] Detect and handle characters encoded in two UTF16 code
|
|
points
|
|
|
|
Another important change: gi->index should point at the current
|
|
character, not the current into the string. Before this change,
|
|
the current character equaled the current index into the string.
|
|
---
|
|
modules/basic/basic-coretext.c | 55 ++++++++++++++++++++++++++++-----------
|
|
1 files changed, 39 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/modules/basic/basic-coretext.c b/modules/basic/basic-coretext.c
|
|
index 33ce479..06b648e 100644
|
|
--- a/modules/basic/basic-coretext.c
|
|
+++ b/modules/basic/basic-coretext.c
|
|
@@ -166,7 +166,42 @@ run_iterator_run_is_non_monotonic (struct RunIterator *iter)
|
|
static gunichar
|
|
run_iterator_get_character (struct RunIterator *iter)
|
|
{
|
|
- return CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
|
|
+ int lower, upper;
|
|
+
|
|
+ lower = iter->current_indices[iter->ct_i];
|
|
+ if (iter->ct_i + 1 < CTRunGetGlyphCount (iter->current_run))
|
|
+ upper = iter->current_indices[iter->ct_i + 1];
|
|
+ else
|
|
+ {
|
|
+ CFRange range = CTRunGetStringRange (iter->current_run);
|
|
+ upper = range.location + range.length;
|
|
+ }
|
|
+
|
|
+ if (upper - lower == 1)
|
|
+ return CFStringGetCharacterAtIndex (iter->cstr, lower);
|
|
+ if (upper - lower == 2)
|
|
+ {
|
|
+ /* Character is encoded in two UTF16 code points. */
|
|
+ gunichar *ch;
|
|
+ gunichar retval;
|
|
+ gunichar2 orig[2];
|
|
+
|
|
+ orig[0] = CFStringGetCharacterAtIndex (iter->cstr, lower);
|
|
+ orig[1] = CFStringGetCharacterAtIndex (iter->cstr, lower + 1);
|
|
+
|
|
+ ch = g_utf16_to_ucs4 (orig, 2, NULL, NULL, NULL);
|
|
+ retval = *ch;
|
|
+ g_free (ch);
|
|
+
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ /* This should not be reached, because other cases cannot occur. Instead
|
|
+ * of crashing, return the first character which will likely be displayed
|
|
+ * as unknown glyph.
|
|
+ */
|
|
+
|
|
+ return CFStringGetCharacterAtIndex (iter->cstr, lower);
|
|
}
|
|
|
|
static CGGlyph
|
|
@@ -175,12 +210,6 @@ run_iterator_get_cgglyph (struct RunIterator *iter)
|
|
return iter->current_cgglyphs[iter->ct_i];
|
|
}
|
|
|
|
-static CFIndex
|
|
-run_iterator_get_index (struct RunIterator *iter)
|
|
-{
|
|
- return iter->current_indices[iter->ct_i];
|
|
-}
|
|
-
|
|
static gboolean
|
|
run_iterator_create (struct RunIterator *iter,
|
|
const char *text,
|
|
@@ -336,7 +365,7 @@ create_core_text_glyph_list (const char *text,
|
|
struct GlyphInfo *gi;
|
|
|
|
gi = g_slice_new (struct GlyphInfo);
|
|
- gi->index = run_iterator_get_index (&riter);
|
|
+ gi->index = riter.total_ct_i;
|
|
gi->cgglyph = run_iterator_get_cgglyph (&riter);
|
|
gi->wc = run_iterator_get_character (&riter);
|
|
|
|
@@ -376,9 +405,8 @@ basic_engine_shape (PangoEngineShape *engine,
|
|
* glyph sequence generated by the CoreText typesetter:
|
|
* # E.g. zero-width spaces do not end up in the CoreText glyph sequence. We have
|
|
* to manually account for the gap in the character indices.
|
|
- * # Sometimes, CoreText generates two glyph for the same character index. We
|
|
- * currently handle this "properly" as in we do not crash or corrupt memory,
|
|
- * but that's about it.
|
|
+ * # Sometimes, CoreText generates two glyph for the same character index. These
|
|
+ * are properly composed into a single 32-bit gunichar.
|
|
* # Due to mismatches in size, the CoreText glyph sequence can either be longer or
|
|
* shorter than the PangoGlyphString. Note that the size of the PangoGlyphString
|
|
* should match the number of characters in "text".
|
|
@@ -390,11 +418,6 @@ basic_engine_shape (PangoEngineShape *engine,
|
|
* increasing/decreasing.
|
|
*
|
|
* FIXME items for future fixing:
|
|
- * # CoreText strings are UTF16, and the indices *often* refer to characters,
|
|
- * but not *always*. Notable exception is when a character is encoded using
|
|
- * two UTF16 code points. This are two characters in a CFString. At this point
|
|
- * advancing a single character in the CFString and advancing a single character
|
|
- * using g_utf8_next_char in the const char string goes out of sync.
|
|
* # We currently don't bother about LTR, Pango core appears to fix this up for us.
|
|
* (Even when we cared warnings were generated that strings were in the wrong
|
|
* order, this should be investigated).
|
|
--
|
|
1.7.4.4
|