#!/usr/bin/perl # # ***** BEGIN LICENSE BLOCK ***** # Version: MPL 1.1/GPL 2.0/LGPL 2.1 # # The contents of this file are subject to the Mozilla Public License Version # 1.1 (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License # for the specific language governing rights and limitations under the # License. # # The Original Code is mozilla.org code. # # The Initial Developer of the Original Code is # Simon Montagu # Portions created by the Initial Developer are Copyright (C) 2006 # the Initial Developer. All Rights Reserved. # # Contributor(s): # # Alternatively, the contents of this file may be used under the terms of # either the GNU General Public License Version 2 or later (the "GPL"), or # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), # in which case the provisions of the GPL or the LGPL are applicable instead # of those above. If you wish to allow use of your version of this file only # under the terms of either the GPL or the LGPL, and not to allow others to # use your version of this file under the terms of the MPL, indicate your # decision by deleting the provisions above and replace them with the notice # and other provisions required by the GPL or the LGPL. If you do not delete # the provisions above, a recipient may use your version of this file under # the terms of any one of the MPL, the GPL or the LGPL. # # ***** END LICENSE BLOCK ***** open ( TEXTFILE , "< NormalizationTest.txt") || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"; open ( OUT , "> NormalizationData.h") #open ( OUT , "> test.txt") || die "Cannot create output file NormalizationData.h\n"; $mpl = < * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ /* DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY mozilla/intl/unicharutil/tools/genNormalizationData.pl */ END_OF_MPL print OUT $mpl; # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently # true on Windows, Linux and Mac (with |g++ -fshort-wchar|). # To make it work where that assumption doesn't hold, one could generate # one huge array containing all the strings as 16-bit units (including # the 0 terminator) and initialize the array of testcaseLine with pointers # into the huge array. while() { chop; if (/^# NormalizationTest-(.+)\.txt/) { print OUT "static char versionText[] = \"$1\";\n"; } elsif (/^\@Part(.)/) { if ($1 != "0") { print OUT " {\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " \"\",\n"; print OUT " },\n"; print OUT "};\n"; } print OUT "\n"; print OUT "static testcaseLine Part$1TestData[] = \n"; print OUT "{\n"; } else { unless (/^\#/) { @cases = split(/;/ , $_); print OUT " {\n"; for ($case = 0; $case < 5; ++$case) { $c = $cases[$case]; print OUT " L\""; @codepoints = split(/ / , $c); foreach (@codepoints) { $cp = hex($_); if ($cp < 0x10000) { # BMP codepoint printf OUT "\\x%04X", $cp; } else { # non-BMP codepoint, convert to surrogate pair printf OUT "\\x%04X\\x%04X", ($cp >> 10) + 0xD7C0, ($cp & 0x03FF) | 0xDC00; } } print OUT "\",\n"; } $description = $cases[10]; $description =~ s/^ \) //; print OUT " \"$description\"\n"; print OUT " },\n"; } } } print OUT " {\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " L\"\",\n"; print OUT " \"\",\n"; print OUT " },\n"; print OUT "};\n"; close (OUT); close (TEXTFILE);