############################################################
# (C) 2006-2007 ZC Miao (hellwolf.misty@gmail.com)
#
# This program is free software; you can redistribute
# it and/or modify it under the terms of the GNU 
# General Public License version 2 as published by the
# Free Software Foundation.
#
############################################################

use strict;
use warnings;

binmode(STDOUT, ":utf8");

open ISO, "<", "iso14651_t1.hacked"
    or die "open iso14651_t1.hacked failed : $!";

while(<ISO>){
    last if $_ eq "__PYTBL__\n";
    print $_;
}

open PYTBL, "<", "pinyin_table.txt"
    or die "open pinyin_table.txt failed : $!";

#ignore first three line
scalar(<PYTBL>);
scalar(<PYTBL>);
scalar(<PYTBL>);

sub utf8_to_utf16{
    my @c = @_;

    if(@c == 1){
        return @c;
    }elsif(@c == 2){
        #00000yyy yyxxxxxx <--> 110yyyyy 10xxxxxx
        return (
                ( (($c[0] & 0x3)<<6) | ($c[1] & 0x3F) ),
                ( (($c[0] & 0x1F)>>2) ),
                );
    }elsif(@c == 3){
        #zzzzyyyy yyxxxxxx <--> 1110zzzz 10yyyyyy 10xxxxxx
        return (
                ( (($c[1] & 0x3)<<6) | ($c[2] & 0x3F) ),
                ( (($c[0] & 0xF)<<4) | (($c[1] & 0x3F)>>2) ),
                );
    }else{        
        #000uuuuu zzzzyyyy yyxxxxxx <--> 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
        return (
                ( (($c[2] & 0x3)<<6) | ($c[3] & 0x3F) ),
                ( (($c[1] & 0xF)<<4) | (($c[2] & 0x3F)>>2) ),
                ( (($c[0] & 0x7)<<2) | (($c[1] & 0x3f)>>4) ),
                );
    }
}

sub pack_unicode{
    my $j = 0;
    my $i = 0;
    $i += shift(@_)*(1 << 8*($j++)) while(scalar(@_));
    return pack("U", $i);
}

my @pya;
my %pyed;

sub doit{
    my ($w, @c) = @_;
    my @u = utf8_to_utf16(@c);
    my $sym;

    $sym = '<U';
    $sym .= sprintf "%02X", $_ for reverse @u;
    $sym .= '>';

    #多音字取最权大的音
    if(!exists($pyed{$sym}) || ($w > $pyed{$sym}{weight})){
        $pyed{$sym}{weight} = $w;
        #<U4E00> <U4E00>;IGNORE;IGNORE;IGNORE
        @pya[$pyed{$sym}{index}] = "" if exists $pyed{$sym}{index};
        $pyed{$sym}{index} = @pya;
        push @pya,
        $sym." ".$sym.";IGNORE;IGNORE;IGNORE\t#".pack_unicode(@u).$w."\n";
    }
}

#parse table line now
for(sort <PYTBL>){
    for(split(/\s/, (split /\t/)[2])){
        my $w;
        my @c;
        while(1){
            my $c = unpack("C", (substr $_, scalar(@c), 1));
            ($w = substr($_, scalar(@c)), last) if $c < 0x80;
            push @c, $c;
        }
        doit $w, @c;
    }
}
close PYTBL;
print $_ for @pya;

print <ISO>;

close ISO;
