mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 828782 - 0001. Add inflate function for replacing invalid byte while decoding UTF-8 character. r=terrence
This commit is contained in:
parent
e4f8bd3055
commit
b5c62bbe77
@ -100,6 +100,7 @@ static JSBool
|
||||
str_encodeURI_Component(JSContext *cx, unsigned argc, Value *vp);
|
||||
|
||||
static const uint32_t INVALID_UTF8 = UINT32_MAX;
|
||||
static const uint32_t REPLACE_UTF8 = 0xFFFD;
|
||||
|
||||
static uint32_t
|
||||
Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
|
||||
@ -3932,6 +3933,97 @@ js::InflateStringToBuffer(JSContext *maybecx, const char *src, size_t srclen,
|
||||
return JS_TRUE;
|
||||
}
|
||||
|
||||
bool
|
||||
js::InflateUTF8StringToBufferReplaceInvalid(JSContext *cx, const char *src,
|
||||
size_t srclen, jschar *dst,
|
||||
size_t *dstlenp)
|
||||
{
|
||||
mozilla::Maybe<AutoSuppressGC> suppress;
|
||||
if (cx)
|
||||
suppress.construct(cx);
|
||||
|
||||
size_t dstlen, origDstlen, offset, j, n;
|
||||
uint32_t v;
|
||||
|
||||
dstlen = dst ? *dstlenp : (size_t) -1;
|
||||
origDstlen = dstlen;
|
||||
offset = 0;
|
||||
|
||||
while (srclen) {
|
||||
v = (uint8_t) *src;
|
||||
n = 1;
|
||||
if (v & 0x80) {
|
||||
while (v & (0x80 >> n))
|
||||
n++;
|
||||
if (n > srclen || n == 1 || n > 4) {
|
||||
/* Incorrect length for decoding. */
|
||||
v = REPLACE_UTF8;
|
||||
n = 1;
|
||||
goto appendCharacter;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for invalid second byte.
|
||||
*
|
||||
* @From Unicode Standard v6.2, Table 3-7 Well-Formed UTF-8 Byte Sequences.
|
||||
*/
|
||||
if ((v == 0xE0 && ((uint8_t)src[1] & 0xE0) != 0xA0) || // E0 A0~BF
|
||||
(v == 0xED && ((uint8_t)src[1] & 0xE0) != 0x80) || // ED 80~9F
|
||||
(v == 0xF0 && ((uint8_t)src[1] & 0xF0) == 0x80) || // F0 90~BF
|
||||
(v == 0xF4 && ((uint8_t)src[1] & 0xF0) != 0x80)) // F4 80~8F
|
||||
{
|
||||
v = REPLACE_UTF8;
|
||||
n = 1;
|
||||
goto appendCharacter;
|
||||
}
|
||||
|
||||
for (j = 1; j < n; j++) {
|
||||
if ((src[j] & 0xC0) != 0x80) {
|
||||
/* Invalid sub-sequence. */
|
||||
v = REPLACE_UTF8;
|
||||
n = j;
|
||||
goto appendCharacter;
|
||||
}
|
||||
}
|
||||
|
||||
v = Utf8ToOneUcs4Char((uint8_t *)src, n);
|
||||
if (v >= 0x10000) {
|
||||
v -= 0x10000;
|
||||
if (v > 0xFFFFF || dstlen < 2) {
|
||||
/* Incorrect code point. */
|
||||
v = REPLACE_UTF8;
|
||||
n = 1;
|
||||
goto appendCharacter;
|
||||
}
|
||||
if (dst) {
|
||||
*dst++ = (jschar)((v >> 10) + 0xD800);
|
||||
v = (jschar)((v & 0x3FF) + 0xDC00);
|
||||
}
|
||||
dstlen--;
|
||||
}
|
||||
}
|
||||
appendCharacter:
|
||||
if (!dstlen)
|
||||
goto bufferTooSmall;
|
||||
if (dst)
|
||||
*dst++ = (jschar) v;
|
||||
dstlen--;
|
||||
offset += n;
|
||||
src += n;
|
||||
srclen -= n;
|
||||
}
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
return true;
|
||||
|
||||
bufferTooSmall:
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BUFFER_TOO_SMALL);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
js::InflateUTF8StringToBuffer(JSContext *cx, const char *src, size_t srclen,
|
||||
jschar *dst, size_t *dstlenp)
|
||||
|
@ -247,6 +247,16 @@ extern bool
|
||||
InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length,
|
||||
jschar *chars, size_t *charsLength);
|
||||
|
||||
/*
|
||||
* The same as InflateUTF8StringToBuffer(), except that any malformed UTF-8
|
||||
* characters will be replaced by \uFFFD. No exception will be thrown for
|
||||
* malformed UTF-8 input.
|
||||
*/
|
||||
extern bool
|
||||
InflateUTF8StringToBufferReplaceInvalid(JSContext *cx, const char *bytes,
|
||||
size_t length, jschar *chars,
|
||||
size_t *charsLength);
|
||||
|
||||
/*
|
||||
* Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
|
||||
* 'length chars. The buffer is NOT null-terminated. The destination length
|
||||
|
Loading…
Reference in New Issue
Block a user