Resynchronize the Shift_JIS converter after unrecognized sequences. Bug 690225, r=emk

This commit is contained in:
Simon Montagu 2011-10-19 07:58:41 +02:00
parent 5629c0bfc8
commit b888e2c269
3 changed files with 15 additions and 4 deletions

View File

@ -3,7 +3,7 @@
*/
const inText = "\xfd\xfe\xff\x81\x20\x81\x3f\x86\x3c";
const expectedText = "\uf8f1\uf8f2\uf8f3\u30fb\u30fb\u30fb";
const expectedText = "\uf8f1\uf8f2\uf8f3\ufffd \ufffd?\ufffd<";
const charset = "Shift_JIS";
load('CharsetConversionTests.js');

View File

@ -59,6 +59,7 @@ static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CI
#define JIS0212_INDEX gJIS0212Index
#define SJIS_UNMAPPED 0x30fb
#define UNICODE_REPLACEMENT_CHARACTER 0xfffd
NS_IMETHODIMP nsShiftJISToUnicode::Convert(
const char * aSrc, PRInt32 * aSrcLen,
@ -169,10 +170,17 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
case 1: // Index to table
{
PRUint8 off = sbIdx[*src];
// Error handling: in the case where the second octet is not in the
// valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
// interpret it as the ASCII value. In the case where the second
// octet is in the valid range but there is no mapping for the
// 2-octet sequence, do not unconsume.
if(0xFF == off) {
src--;
if (mErrBehavior == kOnError_Signal)
goto error_invalidchar;
*dest++ = SJIS_UNMAPPED;
*dest++ = UNICODE_REPLACEMENT_CHARACTER;
} else {
PRUnichar ch = gJapaneseMap[mData+off];
if(ch == 0xfffd) {
@ -191,11 +199,14 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
case 2: // EUDC
{
PRUint8 off = sbIdx[*src];
// Error handling as in case 1
if(0xFF == off) {
src--;
if (mErrBehavior == kOnError_Signal)
goto error_invalidchar;
*dest++ = SJIS_UNMAPPED;
*dest++ = UNICODE_REPLACEMENT_CHARACTER;
} else {
*dest++ = mData + off;
}

View File

@ -6,6 +6,6 @@
<meta HTTP-equiv="content-type" content="text/html; charset=shift_jis">
</head>
<body>
<p>&#xf8f1;&#xf8f2;&#xf8f3;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;</p>
<p>&#xf8f1;&#xf8f2;&#xf8f3;&#xfffd; &#xfffd;!&#xfffd;"&#xfffd;#&#xfffd;$&#xfffd;%&#xfffd;&amp;&#xfffd;'&#xfffd;(&#xfffd;)&#xfffd;*&#xfffd;+&#xfffd;,&#xfffd;-&#xfffd;.&#xfffd;/&#xfffd;0&#xfffd;1&#xfffd;2&#xfffd;3&#xfffd;4&#xfffd;5&#xfffd;6&#xfffd;7&#xfffd;8&#xfffd;9&#xfffd;:&#xfffd;;&#xfffd;&lt;&#xfffd;=&#xfffd;&gt;&#xfffd;?&#x30fb;</p>
</body>
</html>