mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Fix an error in the UTF-8 state machine table, and clean up the table generally. Bug 811363, r=jfkthame.
This commit is contained in:
parent
67c7db920b
commit
3c93ca46a2
@ -403,79 +403,65 @@ const SMModel SJISSMModel = {
|
||||
|
||||
|
||||
static const uint32_t UTF8_cls [ 256 / 8 ] = {
|
||||
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value
|
||||
PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
|
||||
PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
|
||||
PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87
|
||||
PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
|
||||
PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
|
||||
PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
|
||||
PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7
|
||||
PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af
|
||||
PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7
|
||||
PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf
|
||||
PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7
|
||||
PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
|
||||
PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
|
||||
PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
|
||||
PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7
|
||||
PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef
|
||||
PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7
|
||||
PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
|
||||
PCK4BITS( 1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
|
||||
PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
|
||||
PCK4BITS( 2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
|
||||
PCK4BITS( 2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
|
||||
PCK4BITS( 3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
|
||||
PCK4BITS( 3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
|
||||
PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
|
||||
PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
|
||||
PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
|
||||
PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
|
||||
PCK4BITS( 0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
|
||||
PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
|
||||
PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
|
||||
PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
|
||||
PCK4BITS( 6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
|
||||
PCK4BITS( 7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef
|
||||
PCK4BITS( 9,10,10,10,11, 0, 0, 0), // f0 - f7
|
||||
PCK4BITS( 0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
|
||||
};
|
||||
|
||||
|
||||
static const uint32_t UTF8_st [ 26] = {
|
||||
PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07
|
||||
PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f
|
||||
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27
|
||||
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f
|
||||
PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f
|
||||
PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f
|
||||
PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f
|
||||
PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f
|
||||
PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f
|
||||
PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f
|
||||
PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f
|
||||
PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af
|
||||
PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf
|
||||
PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
|
||||
static const uint32_t UTF8_st [ 15] = {
|
||||
PCK4BITS(eError,eStart,eError,eError,eError, 3, 4, 5), // 00 - 07
|
||||
PCK4BITS( 6, 7, 8, 9,eError,eError,eError,eError), // 08 - 0f
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError), // 10 - 17
|
||||
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe), // 18 - 1f
|
||||
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart), // 20 - 27
|
||||
PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError), // 28 - 2f
|
||||
PCK4BITS(eError,eError,eError,eError, 3,eError,eError,eError), // 30 - 37
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError, 3, 3), // 38 - 3f
|
||||
PCK4BITS( 3,eError,eError,eError,eError,eError,eError,eError), // 40 - 47
|
||||
PCK4BITS(eError,eError, 3, 3,eError,eError,eError,eError), // 48 - 4f
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError, 5, 5), // 50 - 57
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError), // 58 - 5f
|
||||
PCK4BITS(eError,eError, 5, 5, 5,eError,eError,eError), // 60 - 67
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError, 5,eError), // 68 - 6f
|
||||
PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) // 70 - 77
|
||||
};
|
||||
|
||||
static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
|
||||
3, 3, 4, 4, 5, 5, 6, 6 };
|
||||
static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4};
|
||||
|
||||
const SMModel UTF8SMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
|
||||
16,
|
||||
12,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
|
||||
CHAR_LEN_TABLE(UTF8CharLenTable),
|
||||
"UTF-8",
|
||||
};
|
||||
|
||||
|
@ -12,132 +12,105 @@ my(@utf8_cls);
|
||||
my(@utf8_st);
|
||||
my($utf8_ver);
|
||||
|
||||
#
|
||||
#
|
||||
# UTF8 encode the UCS4 into 1 to 6 bytes
|
||||
#
|
||||
#
|
||||
#
|
||||
# UTF8 encode the UCS4 into 1 to 4 bytes
|
||||
#
|
||||
# 1 byte 00 00 00 00 00 00 00 7f
|
||||
# 2 bytes 00 00 00 80 00 00 07 ff
|
||||
# 3 bytes 00 00 08 00 00 00 ff ff
|
||||
# 4 bytes 00 01 00 00 00 1f ff ff
|
||||
# 5 bytes 00 20 00 00 03 ff ff ff
|
||||
# 6 bytes 04 00 00 00 7f ff ff ff
|
||||
#
|
||||
# Howerver, since Surrogate area should not be encoded into UTF8 as
|
||||
# 4 bytes 00 01 00 00 00 10 ff ff
|
||||
#
|
||||
# However, since Surrogate area should not be encoded into UTF8 as
|
||||
# a Surrogate pair, we can remove the surrogate area from UTF8
|
||||
#
|
||||
#
|
||||
# 1 byte 00 00 00 00 00 00 00 7f
|
||||
# 2 bytes 00 00 00 80 00 00 07 ff
|
||||
# 3 bytes 00 00 08 00 00 00 d7 ff
|
||||
# 00 00 e0 00 00 00 ff ff
|
||||
# 4 bytes 00 01 00 00 00 1f ff ff
|
||||
# 5 bytes 00 20 00 00 03 ff ff ff
|
||||
# 6 bytes 04 00 00 00 7f ff ff ff
|
||||
#
|
||||
# Now we break them into 6 bits group for 2-6 bytes UTF8
|
||||
#
|
||||
# 4 bytes 00 01 00 00 00 10 ff ff
|
||||
#
|
||||
# Now we break them into 6 bits group for 2-4 bytes UTF8
|
||||
#
|
||||
# 1 byte 00 7f
|
||||
# 2 bytes 02 00 1f 3f
|
||||
# 3 bytes 00 20 00 0d 1f 3f
|
||||
# 0e 00 00 0f 3f 3f
|
||||
# 4 bytes 00 20 00 00 07 3f 3f 3f
|
||||
# 5 bytes 00 08 00 00 00 03 3f 3f 3f 3f
|
||||
# 6 bytes 00 04 00 00 00 00 01 3f 3f 3f 3f 3f
|
||||
#
|
||||
# 4 bytes 00 10 00 00 04 0f 3f 3f
|
||||
#
|
||||
# Break down more
|
||||
#
|
||||
#
|
||||
# 1 byte 00 7f
|
||||
# 2 bytes 02 00 1f 3f
|
||||
# 3 bytes 00 20 00 00 3f 3f
|
||||
# 01 00 00 0c 3f 3f
|
||||
# 0d 00 00 0d 1f 3f
|
||||
# 0e 00 00 0f 3f 3f
|
||||
# 4 bytes 00 20 00 00 00 3f 3f 3f
|
||||
# 01 00 00 00 07 3f 3f 3f
|
||||
# 5 bytes 00 08 00 00 00 00 3f 3f 3f 3f
|
||||
# 01 00 00 00 00 03 3f 3f 3f 3f
|
||||
# 6 bytes 00 04 00 00 00 00 00 3f 3f 3f 3f 3f
|
||||
# 01 00 00 00 00 00 01 3f 3f 3f 3f 3f
|
||||
#
|
||||
# Now, add
|
||||
# 4 bytes 00 10 00 00 00 3f 3f 3f
|
||||
# 01 00 00 00 03 3f 3f 3f
|
||||
# 04 00 00 00 04 0f 3f 3f
|
||||
#
|
||||
# Now, add
|
||||
# c0 to the lead byte of 2 bytes UTF8
|
||||
# e0 to the lead byte of 3 bytes UTF8
|
||||
# f0 to the lead byte of 4 bytes UTF8
|
||||
# f8 to the lead byte of 5 bytes UTF8
|
||||
# fc to the lead byte of 6 bytes UTF8
|
||||
# 80 to the trail bytes of 2 - 6 bytes UTF8
|
||||
#
|
||||
# 80 to the trail bytes
|
||||
#
|
||||
# 1 byte 00 7f
|
||||
# 2 bytes c2 80 df bf
|
||||
# 3 bytes e0 a0 80 e0 bf bf
|
||||
# e1 80 80 ec bf bf
|
||||
# ed 80 80 ed 9f bf
|
||||
# ee 80 80 ef bf bf
|
||||
# 4 bytes f0 a0 80 80 f0 bf bf bf
|
||||
# f1 80 80 80 f7 bf bf bf
|
||||
# 5 bytes f8 88 80 80 80 f8 bf bf bf bf
|
||||
# f9 80 80 80 80 fb bf bf bf bf
|
||||
# 6 bytes fc 84 80 80 80 80 fc bf bf bf bf bf
|
||||
# fd 80 80 80 80 80 fd bf bf bf bf bf
|
||||
#
|
||||
#
|
||||
# 4 bytes f0 90 80 80 f0 bf bf bf
|
||||
# f1 80 80 80 f3 bf bf bf
|
||||
# f4 80 80 80 f4 8f bf bf
|
||||
#
|
||||
#
|
||||
# Now we can construct our state diagram
|
||||
#
|
||||
# 0:0x00,0x0e,0x0f,0x1b->Error
|
||||
#
|
||||
# 0:0x0e,0x0f,0x1b->Error
|
||||
# 0:[0-0x7f]->0
|
||||
# 0:fd->3
|
||||
# 0:fc->4
|
||||
# 0:[f9-fb]->5
|
||||
# 0:f8->6
|
||||
# 0:[f1-f7]->7
|
||||
# 0:f0->8
|
||||
# 0:[e1-ecee-ef]->9
|
||||
# 0:e0->10
|
||||
# 0:ed->11
|
||||
# 0:[c2-df]->12
|
||||
# 0:[c2-df]->3
|
||||
# 0:e0->4
|
||||
# 0:[e1-ec, ee-ef]->5
|
||||
# 0:ed->6
|
||||
# 0:f0->7
|
||||
# 0:[f1-f3]->8
|
||||
# 0:f4->9
|
||||
# 0:*->Error
|
||||
# 3:[80-bf]->5
|
||||
# 3:[80-bf]->0
|
||||
# 3:*->Error
|
||||
# 4:[84-bf]->5
|
||||
# 4:[a0-bf]->3
|
||||
# 4:*->Error
|
||||
# 5:[80-bf]->7
|
||||
# 5:[80-bf]->3
|
||||
# 5:*->Error
|
||||
# 6:[88-bf]->7
|
||||
# 6:[80-9f]->3
|
||||
# 6:*->Error
|
||||
# 7:[80-bf]->9
|
||||
# 7:[90-bf]->5
|
||||
# 7:*->Error
|
||||
# 8:[a0-bf]->9
|
||||
# 8:[80-bf]->5
|
||||
# 8:*->Error
|
||||
# 9:[80-bf]->12
|
||||
# 9:[80-8f]->5
|
||||
# 9:*->Error
|
||||
# 10:[a0-bf]->12
|
||||
# 10:*->Error
|
||||
# 11:[80-9f]->12
|
||||
# 11:*->Error
|
||||
# 12:[80-bf]->0
|
||||
# 12:*->Error
|
||||
#
|
||||
#
|
||||
# Now, we classified chars into class
|
||||
#
|
||||
#
|
||||
# 00,0e,0f,1b:k0
|
||||
# 01-0d,10-1a,1c-7f:k1
|
||||
# 80-83:k2
|
||||
# 84-87:k3
|
||||
# 88-9f:k4
|
||||
# a0-bf:k5
|
||||
# 80-8f:k2
|
||||
# 90-9f:k3
|
||||
# a0-bf:k4
|
||||
# c0-c1:k0
|
||||
# c2-df:k6
|
||||
# e0:k7
|
||||
# e1-ec:k8
|
||||
# ed:k9
|
||||
# ee-ef:k8
|
||||
# f0:k10
|
||||
# f1-f7:k11
|
||||
# f8:k12
|
||||
# f9-fb:k13
|
||||
# fc:k14
|
||||
# fd:k15
|
||||
# fe-ff:k0
|
||||
# c2-df:k5
|
||||
# e0:k6
|
||||
# e1-ec:k7
|
||||
# ed:k8
|
||||
# ee-ef:k7
|
||||
# f0:k9
|
||||
# f1-f3:k10
|
||||
# f4:k11
|
||||
# f5-ff:k0
|
||||
#
|
||||
# Now, let's put them into array form
|
||||
|
||||
@ -148,84 +121,68 @@ my($utf8_ver);
|
||||
[ 0x01 , 0x0d , 1 ],
|
||||
[ 0x10 , 0x1a , 1 ],
|
||||
[ 0x1c , 0x7f , 1 ],
|
||||
[ 0x80 , 0x83 , 2 ],
|
||||
[ 0x84 , 0x87 , 3 ],
|
||||
[ 0x88 , 0x9f , 4 ],
|
||||
[ 0xa0 , 0xbf , 5 ],
|
||||
[ 0x80 , 0x8f , 2 ],
|
||||
[ 0x90 , 0x9f , 3 ],
|
||||
[ 0xa0 , 0xbf , 4 ],
|
||||
[ 0xc0 , 0xc1 , 0 ],
|
||||
[ 0xc2 , 0xdf , 6 ],
|
||||
[ 0xe0 , 0xe0 , 7 ],
|
||||
[ 0xe1 , 0xec , 8 ],
|
||||
[ 0xed , 0xed , 9 ],
|
||||
[ 0xee , 0xef , 8 ],
|
||||
[ 0xf0 , 0xf0 , 10 ],
|
||||
[ 0xf1 , 0xf7 , 11 ],
|
||||
[ 0xf8 , 0xf8 , 12 ],
|
||||
[ 0xf9 , 0xfb , 13 ],
|
||||
[ 0xfc , 0xfc , 14 ],
|
||||
[ 0xfd , 0xfd , 15 ],
|
||||
[ 0xfe , 0xff , 0 ],
|
||||
[ 0xc2 , 0xdf , 5 ],
|
||||
[ 0xe0 , 0xe0 , 6 ],
|
||||
[ 0xe1 , 0xec , 7 ],
|
||||
[ 0xed , 0xed , 8 ],
|
||||
[ 0xee , 0xef , 7 ],
|
||||
[ 0xf0 , 0xf0 , 9 ],
|
||||
[ 0xf1 , 0xf3 , 10 ],
|
||||
[ 0xf4 , 0xf4 , 11 ],
|
||||
[ 0xf5 , 0xff , 0 ],
|
||||
);
|
||||
#
|
||||
# Now, we write the state diagram in class
|
||||
#
|
||||
#
|
||||
# Now, we write the state diagram in class
|
||||
#
|
||||
# 0:k0->Error
|
||||
# 0:k1->0
|
||||
# 0:k15->3
|
||||
# 0:k14->4
|
||||
# 0:k13->5
|
||||
# 0:k12->6
|
||||
# 0:k11->7
|
||||
# 0:k5->3
|
||||
# 0:k6->4
|
||||
# 0:k7->5
|
||||
# 0:k8->6
|
||||
# 0:k9->7
|
||||
# 0:k10->8
|
||||
# 0:k8->9
|
||||
# 0:k7->10
|
||||
# 0:k9->11
|
||||
# 0:k6->12
|
||||
# 0:k11->9
|
||||
# 0:*->Error
|
||||
# 3:k2,k3,k4,k5->5
|
||||
# 3:k2,k3,k4->0
|
||||
# 3:*->Error
|
||||
# 4:k3,k4,k5->5
|
||||
# 4:k4->3
|
||||
# 4:*->Error
|
||||
# 5:k2,k3,k4,k5->7
|
||||
# 5:k2,k3,k4->3
|
||||
# 5:*->Error
|
||||
# 6:k4,k5->7
|
||||
# 6:k2,k3->3
|
||||
# 6:*->Error
|
||||
# 7:k2,k3,k4,k5->9
|
||||
# 7:k3,k4->5
|
||||
# 7:*->Error
|
||||
# 8:k5->9
|
||||
# 8:k2,k3,k4->5
|
||||
# 8:*->Error
|
||||
# 9:k2,k3,k4,k5->12
|
||||
# 9:k2->5
|
||||
# 9:*->Error
|
||||
# 10:k5->12
|
||||
# 10:*->Error
|
||||
# 11:k2,k3,k4->12
|
||||
# 11:*->Error
|
||||
# 12:k2,k3,k4,k5->0
|
||||
# 12:*->Error
|
||||
#
|
||||
#
|
||||
# Now, let's put them into array
|
||||
#
|
||||
#
|
||||
package genverifier;
|
||||
@utf8_st = (
|
||||
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
1, 0, 1, 1, 1, 1,12,10, 9,11, 8, 7, 6, 5, 4, 3, # state 0 Start
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 1 Error
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # state 2 ItsMe
|
||||
1, 1, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 3
|
||||
1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 4
|
||||
1, 1, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 5
|
||||
1, 1, 1, 1, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 6
|
||||
1, 1, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 7
|
||||
1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 8
|
||||
1, 1,12,12,12,12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 9
|
||||
1, 1, 1, 1, 1,12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 10
|
||||
1, 1,12,12,12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 11
|
||||
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 12
|
||||
# 0 1 2 3 4 5 6 7 8 9 10 11
|
||||
1, 0, 1, 1, 1, 3, 4, 5, 6, 7, 8, 9, # state 0 Start
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 1 Error
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # state 2 ItsMe
|
||||
1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, # state 3
|
||||
1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, # state 4
|
||||
1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, # state 5
|
||||
1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, # state 6
|
||||
1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, # state 7
|
||||
1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, # state 8
|
||||
1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 9
|
||||
);
|
||||
|
||||
|
||||
|
||||
$utf8_ver = genverifier::GenVerifier("UTF8", "UTF-8", \@utf8_cls, 16, \@utf8_st);
|
||||
$utf8_ver = genverifier::GenVerifier("UTF8", "UTF-8", \@utf8_cls, 12, \@utf8_st);
|
||||
print $utf8_ver;
|
||||
|
||||
|
||||
|
@ -54,13 +54,13 @@ sub GenStatePkg {
|
||||
sub GenPkg {
|
||||
my($name, $bits, $tbl) = @_;
|
||||
my($ret);
|
||||
$ret = " {\n" .
|
||||
" eIdxSft" . $bits . "bits, \n" .
|
||||
" eSftMsk" . $bits . "bits, \n" .
|
||||
" eBitSft" . $bits . "bits, \n" .
|
||||
" eUnitMsk" . $bits . "bits, \n" .
|
||||
" " . $name . $tbl . " \n" .
|
||||
" }";
|
||||
$ret = " {" .
|
||||
"eIdxSft" . $bits . "bits, " .
|
||||
"eSftMsk" . $bits . "bits, " .
|
||||
"eBitSft" . $bits . "bits, " .
|
||||
"eUnitMsk" . $bits . "bits, " .
|
||||
$name . $tbl . "" .
|
||||
" }";
|
||||
return $ret;
|
||||
};
|
||||
##--------------------------------------------------------------
|
||||
@ -75,7 +75,7 @@ sub Gen4BitsClass {
|
||||
$ret .= "PCK4BITS(";
|
||||
for($j = $i; $j < $i + 8; $j++) {
|
||||
$cls = &GetClass($j,$clstbl);
|
||||
$ret .= sprintf("%d", $cls) ;
|
||||
$ret .= sprintf("%2d", $cls) ;
|
||||
if($j != ($i+7)) {
|
||||
$ret .= ",";
|
||||
}
|
||||
@ -85,7 +85,7 @@ sub Gen4BitsClass {
|
||||
} else {
|
||||
$ret .= "),";
|
||||
}
|
||||
$ret .= sprintf(" // %02x - %02x \n", $i, ($i+7));
|
||||
$ret .= sprintf(" // %02x - %02x\n", $i, ($i+7));
|
||||
}
|
||||
$ret .= "};\n";
|
||||
return $ret;
|
||||
@ -101,16 +101,18 @@ sub GenVerifier {
|
||||
$ret .= "\n\n";
|
||||
$ret .= Gen4BitsState($name, $st);
|
||||
$ret .= "\n\n";
|
||||
$ret .= "static nsVerifier ns" . $name . "Verifier = {\n";
|
||||
$ret .= ' "' . $charset . '",' . "\n";
|
||||
$ret .= "const SMModel " . $name . "SMModel = {\n";
|
||||
$ret .= GenClassPkg($name, 4);
|
||||
$ret .= ",\n";
|
||||
$ret .= " " . $numcls;
|
||||
$ret .= " " . $numcls;
|
||||
$ret .= ",\n";
|
||||
$ret .= GenStatePkg($name, 4);
|
||||
$ret .= "\n};\n";
|
||||
$ret .= ",\n";
|
||||
$ret .= " " . "CHAR_LEN_TABLE(" . $name . "CharLenTable),\n";
|
||||
$ret .= ' "' . $charset . '",' . "\n";
|
||||
$ret .= "};\n";
|
||||
return $ret;
|
||||
|
||||
|
||||
};
|
||||
##--------------------------------------------------------------
|
||||
sub Gen4BitsState {
|
||||
@ -141,7 +143,7 @@ sub Gen4BitsState {
|
||||
} else {
|
||||
$ret .= "),";
|
||||
}
|
||||
$ret .= sprintf("//%02x-%02x \n", $i, ($i+7));
|
||||
$ret .= sprintf(" // %02x - %02x\n", $i, ($i+7));
|
||||
}
|
||||
$ret .= "};\n";
|
||||
return $ret;
|
||||
@ -150,7 +152,7 @@ sub Gen4BitsState {
|
||||
|
||||
sub GenNote {
|
||||
my($ret) = << "END_NOTE";
|
||||
/*
|
||||
/*
|
||||
* DO NOT EDIT THIS DOCUMENT MANUALLY !!!
|
||||
* THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER
|
||||
* mozilla/intl/chardet/tools/
|
||||
|
Loading…
Reference in New Issue
Block a user