linux-packaging-mono/external/bockbuild/packages/patches/pango-coretext-astral-plane-2.patch
Xamarin Public Jenkins (auto-signing) 6bdd276d05 Imported Upstream version 5.0.0.42
Former-commit-id: fd56571888259555122d8a0f58c68838229cea2b
2017-04-10 11:41:01 +00:00

110 lines
4.3 KiB
Diff

661f8c0b920f5da Mon Sep 17 00:00:00 2001
From: Kristian Rietveld <kris@lanedo.com>
Date: Tue, 19 Mar 2013 11:23:49 +0100
Subject: [PATCH 2/2] Detect and handle characters encoded in two UTF16 code
points
Another important change: gi->index should point at the current
character, not the current into the string. Before this change,
the current character equaled the current index into the string.
---
modules/basic/basic-coretext.c | 55 ++++++++++++++++++++++++++++-----------
1 files changed, 39 insertions(+), 16 deletions(-)
diff --git a/modules/basic/basic-coretext.c b/modules/basic/basic-coretext.c
index 33ce479..06b648e 100644
--- a/modules/basic/basic-coretext.c
+++ b/modules/basic/basic-coretext.c
@@ -166,7 +166,42 @@ run_iterator_run_is_non_monotonic (struct RunIterator *iter)
static gunichar
run_iterator_get_character (struct RunIterator *iter)
{
- return CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
+ int lower, upper;
+
+ lower = iter->current_indices[iter->ct_i];
+ if (iter->ct_i + 1 < CTRunGetGlyphCount (iter->current_run))
+ upper = iter->current_indices[iter->ct_i + 1];
+ else
+ {
+ CFRange range = CTRunGetStringRange (iter->current_run);
+ upper = range.location + range.length;
+ }
+
+ if (upper - lower == 1)
+ return CFStringGetCharacterAtIndex (iter->cstr, lower);
+ if (upper - lower == 2)
+ {
+ /* Character is encoded in two UTF16 code points. */
+ gunichar *ch;
+ gunichar retval;
+ gunichar2 orig[2];
+
+ orig[0] = CFStringGetCharacterAtIndex (iter->cstr, lower);
+ orig[1] = CFStringGetCharacterAtIndex (iter->cstr, lower + 1);
+
+ ch = g_utf16_to_ucs4 (orig, 2, NULL, NULL, NULL);
+ retval = *ch;
+ g_free (ch);
+
+ return retval;
+ }
+
+ /* This should not be reached, because other cases cannot occur. Instead
+ * of crashing, return the first character which will likely be displayed
+ * as unknown glyph.
+ */
+
+ return CFStringGetCharacterAtIndex (iter->cstr, lower);
}
static CGGlyph
@@ -175,12 +210,6 @@ run_iterator_get_cgglyph (struct RunIterator *iter)
return iter->current_cgglyphs[iter->ct_i];
}
-static CFIndex
-run_iterator_get_index (struct RunIterator *iter)
-{
- return iter->current_indices[iter->ct_i];
-}
-
static gboolean
run_iterator_create (struct RunIterator *iter,
const char *text,
@@ -336,7 +365,7 @@ create_core_text_glyph_list (const char *text,
struct GlyphInfo *gi;
gi = g_slice_new (struct GlyphInfo);
- gi->index = run_iterator_get_index (&riter);
+ gi->index = riter.total_ct_i;
gi->cgglyph = run_iterator_get_cgglyph (&riter);
gi->wc = run_iterator_get_character (&riter);
@@ -376,9 +405,8 @@ basic_engine_shape (PangoEngineShape *engine,
* glyph sequence generated by the CoreText typesetter:
* # E.g. zero-width spaces do not end up in the CoreText glyph sequence. We have
* to manually account for the gap in the character indices.
- * # Sometimes, CoreText generates two glyph for the same character index. We
- * currently handle this "properly" as in we do not crash or corrupt memory,
- * but that's about it.
+ * # Sometimes, CoreText generates two glyph for the same character index. These
+ * are properly composed into a single 32-bit gunichar.
* # Due to mismatches in size, the CoreText glyph sequence can either be longer or
* shorter than the PangoGlyphString. Note that the size of the PangoGlyphString
* should match the number of characters in "text".
@@ -390,11 +418,6 @@ basic_engine_shape (PangoEngineShape *engine,
* increasing/decreasing.
*
* FIXME items for future fixing:
- * # CoreText strings are UTF16, and the indices *often* refer to characters,
- * but not *always*. Notable exception is when a character is encoded using
- * two UTF16 code points. This are two characters in a CFString. At this point
- * advancing a single character in the CFString and advancing a single character
- * using g_utf8_next_char in the const char string goes out of sync.
* # We currently don't bother about LTR, Pango core appears to fix this up for us.
* (Even when we cared warnings were generated that strings were in the wrong
* order, this should be investigated).
--
1.7.4.4