diff --git a/intl/unicharutil/tools/genUnicodePropertyData.pl b/intl/unicharutil/tools/genUnicodePropertyData.pl index d94d9bd7b7d..0e01babe9f5 100644 --- a/intl/unicharutil/tools/genUnicodePropertyData.pl +++ b/intl/unicharutil/tools/genUnicodePropertyData.pl @@ -26,11 +26,18 @@ # though this may change if we find a need for additional properties. # # The Unicode data files listed above should be together in one directory. +# # We also require the file # http://www.unicode.org/Public/security/latest/xidmodifications.txt # This file should be in a sub-directory "security" immediately below the # directory containing the other Unicode data files. # +# We also require the latest data file for UTR50, currently revision-12: +# http://www.unicode.org/Public/vertical/revision-12/VerticalOrientation-12.txt +# This file should be in a sub-directory "vertical" immediately below the +# directory containing the other Unicode data files. +# +# # (2) Run this tool using a command line of the form # # perl genUnicodePropertyData.pl \ @@ -301,6 +308,13 @@ my %bidicategoryCode = ( "BN" => "18" # Boundary Neutral ); +my %verticalOrientationCode = ( + 'U' => 0, # U - Upright, the same orientation as in the code charts + 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts + 'Tu' => 2, # Tu - Transformed typographically, with fallback to Upright + 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated +); + # initialize default properties my @script; my @category; @@ -314,6 +328,7 @@ my @numericvalue; my @hanVariant; my @bidicategory; my @fullWidth; +my @verticalOrientation; for (my $i = 0; $i < 0x110000; ++$i) { $script[$i] = $scriptCode{"UNKNOWN"}; $category[$i] = $catCode{"UNASSIGNED"}; @@ -324,6 +339,7 @@ for (my $i = 0; $i < 0x110000; ++$i) { $hanVariant[$i] = 0; $bidicategory[$i] = $bidicategoryCode{"L"}; $fullWidth[$i] = 0; + $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R' } # blocks where the default for bidi category is not L @@ -628,6 +644,31 @@ while () { } close FH; +# read VerticalOrientation-12.txt +open FH, "< $ARGV[1]/vertical/VerticalOrientation-12.txt" or die "can't open UTR50 data file VerticalOrientation-12.txt\n"; +push @versionInfo, ""; +while () { + chomp; + push @versionInfo, $_; + last if /Date:/; +} +while () { + chomp; + s/#.*//; + if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { + my $vo = $3; + warn "unknown Vertical_Orientation code $vo" + unless exists $verticalOrientationCode{$vo}; + $vo = $verticalOrientationCode{$vo}; + my $start = hex "0x$1"; + my $end = (defined $2) ? hex "0x$2" : $start; + for (my $i = $start; $i <= $end; ++$i) { + $verticalOrientation[$i] = $vo; + } + } +} +close FH; + my $timestamp = gmtime(); open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output"; @@ -706,18 +747,35 @@ sub sprintCharProps1 my $usv = shift; return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]); } -&genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};", - "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1); +my $type = q/ +struct nsCharProps1 { + unsigned char mMirrorOffsetIndex:5; + unsigned char mHangulType:3; + unsigned char mCombiningClass:8; +}; +/; +&genTables("CharProp1", $type, "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1); sub sprintCharProps2 { my $usv = shift; - return sprintf("{%d,%d,%d,%d,%d,%d},", + return sprintf("{%d,%d,%d,%d,%d,%d,%d},", $script[$usv], $eaw[$usv], $category[$usv], - $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv]); + $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv], + $verticalOrientation[$usv]); } -&genTables("CharProp2", "struct nsCharProps2 {\n unsigned char mScriptCode:8;\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n unsigned char mBidiCategory:5;\n unsigned char mXidmod:4;\n signed char mNumericValue:5;\n unsigned char mHanVariant:2;\n};", - "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1); +$type = q/ +struct nsCharProps2 { + unsigned char mScriptCode:8; + unsigned char mEAW:3; + unsigned char mCategory:5; + unsigned char mBidiCategory:5; + unsigned char mXidmod:4; + signed char mNumericValue:5; + unsigned char mVertOrient:2; +}; +/; +&genTables("CharProp2", $type, "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1); print HEADER "#pragma pack()\n\n";