#!perl -w
#
# make_encmap
#
# Copyright 1998 Clark Cooper
# Changes in Version 2.00 onwards Copyright (C) 2009 Steve Hay
# All rights reserved.
#
# This program is free software; you may redistribute it and/or
# modify it under the same terms as Perl itself.
#

use 5.008001;

my $name = shift;
my $file = shift;

my $except_str = '$@\^`{}~';
my %Exceptions;

foreach (unpack('c*', $except_str)) {
  $Exceptions{$_} = 1;
}

die "Usage is:\n\tmake_encmap name file\n"
  unless (defined($name) and defined($file));

open(MAP, $file) or die "Couldn't open $file";

my @byte1;
my $minpos = 256;

while (<MAP>) {
  next if /^\#/;

  next unless /0x([\da-f]{2,4})\s+0x([\da-f]{4})\s*\#\s*(.*)\s*$/i;

  my ($from, $to, $name) = ($1, $2, $3);

  my $flen = length($from);
  die "Bad line at $., from must be either 2 or 4 digits:\n$_"
    if $flen == 3;

  my $toval = hex($to);
  my $f1 = substr($from, 0, 2);
  my $f1val = hex($f1);

  if ($flen == 2) {
    if ($f1val < 128) {
	next if $f1val == $toval;

	warn "The byte '0x$f1' mapped to 0x$to\n"
	  unless defined($Exceptions{$f1val});
      }
		      
    if (defined($byte1[$f1val])) {
      die "Multiple mappings for 0x$f1val: $to & " . sprintf("0x%x",
							     $byte1[$f1val])
	if ($byte1[$f1val] != $toval);
    }
    else {
      $byte1[$f1val] = $toval;
      $minpos = $f1val
	if $f1val < $minpos;
    }
  }
  else {
    my $b1 = $byte1[$f1val];

    if (defined($b1)) {
      die "The 1st byte of '$from' overlaps a single byte definition."
	unless ref($b1);
    }
    else {
      $b1 = $byte1[$f1val] = [];
      $minpos = $f1val
	if $f1val < $minpos;
    }

    my $f2 = substr($from, 2, 2);
    my $f2val = hex($f2);

    $b1->[$f2val] = $toval;
  }
}

close(MAP);

die "Minpos never set" unless $minpos < 256;

print "<encmap name='$name'>\n";

process_byte(2, $minpos, \@byte1);

print "</encmap>\n";

####
## End main
####

sub emit {
  my ($pre, $start, $lim, $val) = @_;

  my $len = $lim - $start;

  if ($len == 1) {
    printf("$pre<ch byte='x%02x' uni='x%04x'/>\n", $start, $val);
  }
  else {
    printf("$pre<range byte='x%02x' len='%d' uni='x%04x'/>\n",
	   $start, $len, $val);
  }
}  # End emit

sub process_byte {
  my ($lead, $minpos, $aref) = @_;

  my $rngstrt;
  my $rngval;
  my $i;

  my $prefix = ' ' x $lead;

  for ($i = $minpos; $i <= $#{$aref}; $i++) {
    my $v = $ {$aref}[$i];
  
    if (defined($v)) {
      if (ref($v)) {
	emit($prefix, $rngstrt, $i, $rngval)
	  if defined($rngstrt);

	$rngstrt = undef;
	printf "$prefix<prefix byte='x%02x'>\n", $i;
	process_byte($lead + 2, 0, $v);
	print "$prefix</prefix>\n";
      }
      else {
	next if (defined($rngstrt) and ($v - $rngval == $i - $rngstrt));

	emit($prefix, $rngstrt, $i, $rngval)
	  if defined($rngstrt);

	$rngstrt = $i;
	$rngval = $v;
      }
    }
    else {
      emit($prefix, $rngstrt, $i, $rngval)
	if defined($rngstrt);

      $rngstrt = undef;
    }
  }

  emit($prefix, $rngstrt, $i, $rngval)
    if defined($rngstrt);

}  # End process_byte

__END__

=head1 NAME

make_encmap - create an XML representation from an Unicode mapping file

=head1 SYNOPSIS

B<make_encmap> I<name> I<file>

=head1 DESCRIPTION

B<make_encmap> creates a XML encmap file with a given name from an Unicode
mapping file, received e.g. from F<ftp://ftp.unicode.org>. The result by default
is output to F<stdout>.

=head1 OPTIONS

There are no options you can use.

=head1 EXAMPLES

The following example shows the usage of B<make_encmap> for the ISO/IEC 8859-15
table.

B<
    wget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT
    make_encmap 8859-15 8859-15.TXT E<gt> 8859-15.encmap
>

=head1 SEE ALSO

L<compile_encoding(1)>,
L<XML::Encoding(3pm)>

=head1 AUTHORS

This manual page was written by Daniel Leidert E<lt>daniel.leidert@wgdd.deE<gt>
for the Debian project (but may be used by others).

=cut