=head1 NAME
Lingua::KO::Romanize::Hangul - Romanization of Korean language
=head1 SYNOPSIS
use Lingua::KO::Romanize::Hangul;
my $conv = Lingua::KO::Romanize::Hangul->new();
my $roman = $conv->char( $hangul );
printf( "<ruby><rb>%s</rb><rt>%s</rt></ruby>", $hangul, $roman );
my @array = $conv->string( $string );
foreach my $pair ( @array ) {
my( $raw, $ruby ) = @$pair;
if ( defined $ruby ) {
printf( "<ruby><rb>%s</rb><rt>%s</rt></ruby>", $raw, $ruby );
} else {
print $raw;
}
}
=head1 DESCRIPTION
Hangul is phonemic characters of the Korean language.
This module follows the C<Revised Romanization of Korean>
which was released on July 7, 2000
as the official romanization system in South Korea.
=head2 $conv = Lingua::KO::Romanize::Hangul->new();
This constructer methods returns a new object.
=head2 $roman = $conv->char( $hangul );
This method returns romanized letters of a Hangul character.
It returns undef when $hanji is not a valid Hangul character.
The argument's encoding must be UTF-8.
=head2 $roman = $conv->chars( $string );
This method returns romanized letters of Hangul characters.
=head2 @array = $conv->string( $string );
This method returns a array of referenced arrays
which are pairs of a Hangul chacater and its romanized letters.
$array[0] # first Korean character's pair (array)
$array[1][0] # secound Korean character itself
$array[1][1] # its romanized letters
=head1 UTF-8 FLAG
This module treats utf8 flag transparently.
=head1 SEE ALSO
L<Lingua::JA::Romanize::Japanese> for Japanese
L<Lingua::ZH::Romanize::Pinyin> for Chinese
http://www.korean.go.kr/06_new/rule/rule06.jsp
http://www.kawa.net/works/perl/romanize/romanize-e.html
=head1 COPYRIGHT AND LICENSE
Copyright (c) 1998-2008 Yusuke Kawasaki. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut
# ----------------------------------------------------------------
package Lingua::KO::Romanize::Hangul;
use strict;
use vars qw( $VERSION );
$VERSION = "0.20";
my $PERL581 = 1 if ( $] >= 5.008001 );
my $INITIAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
g kk n d tt r m b pp s ss - j jj
ch k t p h
)];
my $PEAK_LETTER = [map {$_ eq '-' ? '' : $_} qw(
a ae ya yae eo e yeo ye o wa wae oe yo u
wo we wi yu eu ui i
)];
my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
- g kk ks n nj nh d r lg lm lb ls lt
lp lh m b ps s ss ng j c k t p h
)];
# my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
# - g kk ks n nj nh d r rg rm rb rs rt
# rp rh m b bs s ss ng j c k t p h
# )];
# ----------------------------------------------------------------
sub new {
my $package = shift;
my $self = {@_};
bless $self, $package;
$self;
}
sub char {
my $self = shift;
return $self->_char(@_) unless $PERL581;
my $char = shift;
my $utf8 = utf8::is_utf8( $char );
utf8::encode( $char ) if $utf8;
$char = $self->_char( $char );
utf8::decode( $char ) if $utf8;
$char;
}
sub _char {
my $self = shift;
my $char = shift;
my( $c1, $c2, $c3, $c4 ) = unpack("C*",$char);
return if ( ! defined $c3 || defined $c4 );
my $ucs2 = (($c1 & 0x0F)<<12) | (($c2 & 0x3F)<<6) | ($c3 & 0x3F);
return if ( $ucs2 < 0xAC00 );
return if ( $ucs2 > 0xD7A3 );
my $han = $ucs2 - 0xAC00;
my $init = int( $han / 21 / 28 );
my $peak = int( $han / 28 ) % 21;
my $fin = $han % 28;
join( "", $INITIAL_LETTER->[$init], $PEAK_LETTER->[$peak], $FINAL_LETTER->[$fin] );
}
sub chars {
my $self = shift;
my @array = $self->string( shift );
join( " ", map {$#$_>0 ? $_->[1] : $_->[0]} @array );
}
sub string {
my $self = shift;
return $self->_string(@_) unless $PERL581;
my $char = shift;
my $flag = utf8::is_utf8( $char );
utf8::encode( $char ) if $flag;
my @array = $self->_string( $char );
if ( $flag ) {
foreach my $pair ( @array ) {
utf8::decode( $pair->[0] ) if defined $pair->[0];
utf8::decode( $pair->[1] ) if defined $pair->[1];
}
}
@array;
}
# [UCS-2] AC00-D7A3
# [UTF-8] EAB080-ED9EA3
# EA-ED are appeared only as Hangul's first character.
sub _string {
my $self = shift;
my $src = shift;
my $array = [];
while ( $src =~ /([\xEA-\xED][\x80-\xBF]{2})|([^\xEA-\xED]+)/sg ) {
if ( defined $1 ) {
my $pair = [ $1 ];
my $roman = $self->char( $1 );
$pair->[1] = $roman if defined $roman;
push( @$array, $pair );
} else {
push( @$array, [ $2 ] );
}
}
for ( my $i = 0 ; $i < $#$array ; $i++ ) {
next if ( scalar @{ $array->[$i] } < 2 );
next if ( scalar @{ $array->[ $i + 1 ] } < 2 );
my $this = $array->[$i]->[1];
my $next = $array->[ $i + 1 ]->[1];
my $novowel = 1 unless ( $next =~ /^[aeouiwy]/ );
if ( $this =~ /(tt|pp|jj)$/ && $novowel ) {
$array->[$i]->[1] =~ s/(tt|pp|jj)$//;
}
elsif ( $this =~ /([^n]g|kk)$/ && $novowel ) {
$array->[$i]->[1] =~ s/(g|kk)$/k/;
}
elsif ( $this =~ /(d|j|ch|s?s)$/ && $novowel ) {
$array->[$i]->[1] =~ s/(d|j|ch|s?s)$/t/;
}
elsif ( $this =~ /(b)$/ && $novowel ) {
$array->[$i]->[1] =~ s/(b)$/p/;
}
elsif ( $this =~ /(r)$/ && $novowel ) {
$array->[$i]->[1] =~ s/(r)$/l/;
$array->[$i+1]->[1] =~ s/^r/l/;
}
}
if ( scalar @$array ) {
my $last = $array->[$#$array];
my $this = $last->[1];
if ( $this =~ /(tt|pp|jj)$/ ) {
$last->[1] =~ s/(tt|pp|jj)$//;
}
elsif ( $this =~ /([^n]g|kk)$/ ) {
$last->[1] =~ s/(g|kk)$/k/;
}
elsif ( $this =~ /(d|j|ch|s?s)$/ ) {
$last->[1] =~ s/(d|j|ch|s?s)$/t/;
}
elsif ( $this =~ /(b)$/ ) {
$last->[1] =~ s/(b)$/p/;
}
elsif ( $this =~ /(r)$/ ) {
$last->[1] =~ s/(r)$/l/;
}
}
@$array;
}
# ----------------------------------------------------------------
;1;