gecko/intl/chardet/tools/charfreq.pl

83 lines
2.4 KiB
Perl

#!/usr/bin/perl
#!/usr/bin/perl
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1999
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either of the GNU General Public License Version 2 or later (the "GPL"),
# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
@count;
while(<STAT>)
{
@k = split(/\s+/, $_);
$count{$k[0]} = $k[1];
}
$count = 0;
while(<STDIN>)
{
@ck = split /\s*/, $_;
$s = 0;
$fb = 0;
$cl = $#ck;
$j = 0;
while($j < $cl) {
$cc = unpack("C", $ck[$j]);
if(0 eq $s ) {
if($cc > 0x80) {
if($cc > 0xa0) {
$fb = $ck[$j];
$s = 2;
} else {
$s = 1;
}
}
} elsif (1 eq $s) {
} else {
if($cc > 0xa0) {
$fb .= $ck[$j];
$count{$fb}++;
print $fb . " " .$count{$fb} . "\n";
$s = 0;
} else {
$s = 1;
}
}
$j = $j + 1;
}
}
foreach $c (sort(keys( %count )))
{
print $c . " ". $count{$c} . "\n";
}