package XML::Parser::Style::ETree;
use 5.006002;
use strict;
use warnings;
use Scalar::Util ();
=head1 NAME
XML::Parser::Style::ETree - Parse xml to simple tree
=head1 VERSION
Version 0.09
=cut
our $VERSION = '0.09';
=head1 SYNOPSIS
use XML::Parser;
my $p = XML::Parser->new( Style => 'ETree' );
=head1 EXAMPLE
<root at="key">
<nest>
first
<v>a</v>
mid
<v at="a">b</v>
<vv></vv>
last
</nest>
</root>
will be
{
root => {
'-at' => 'key',
nest => {
'#text' => 'firstmidlast',
vv => '',
v => [
'a',
{
'-at' => 'a',
'#text' => 'b'
}
]
}
}
}
=head1 SPECIAL VARIABLES
=over 4
=item $TEXT{ATTR} [ = '-' ]
Allow to set prefix for name of attribute nodes;
<item attr="value" />
# will be
item => { -attr => 'value' };
# with
$TEXT{ATTR} = '+';
# will be
item => { '+attr' => 'value' };
=item $TEXT{NODE} [ = '#text' ]
Allow to set name for text nodes
<item><sub attr="t"></sub>Text value</item>
# will be
item => { sub => { -attr => "t" }, #text => 'Text value' };
# with
$TEXT{NODE} = '';
# will be
item => { sub => { -attr => "t" }, '' => 'Text value' };
=item $TEXT{JOIN} [ = '' ]
Allow to set join separator for text node, splitted by subnodes
<item>Test1<sub />Test2</item>
# will be
item => { sub => '', #text => 'Test1Test2' };
# with
$TEXT{JOIN} = '+';
# will be
item => { sub => '', #text => 'Test1+Test2' };
=item $TEXT{TRIM} [ = 1 ]
Trim leading and trailing whitespace from text nodes
<item> Test1 <sub /> Test2 </item>
# will be
item => { sub => '', #text => 'Test1Test2' };
# with
$TEXT{TRIM} = 0;
# will be
item => { sub => '', #text => ' Test1 Test2 ' };
=item %FORCE_ARRAY
Allow to force nodes to be represented always as arrays. If name is empty string, then ot means ALL
<item><sub attr="t"></sub>Text value</item>
# will be
item => { sub => { -attr => "t" }, #text => 'Text value' };
# with
$FORCE_ARRAY{sub} = 1;
# will be
item => { sub => [ { -attr => "t" } ], #text => 'Text value' };
# with
$FORCE_ARRAY{''} = 1;
# will be
item => [ { sub => [ { -attr => "t" } ], #text => 'Text value' } ];
=item %FORCE_HASH
Allow to force text-only nodes to be represented always as hashes. If name is empty string, then ot means ALL
<item><sub>Text value</sub><any>Text value</any></item>
# will be
item => { sub => 'Text value', any => 'Text value' };
# with
$FORCE_HASH{sub} = 1;
# will be
item => { sub => { #text => 'Text value' }, any => 'Text value' };
# with
$FORCE_HASH{''} = 1;
# will be
item => { sub => { #text => 'Text value' }, any => { #text => 'Text value' } };
=item @STRIP_KEY
Allow to strip something from tag names by regular expressions
<a:item><b:sub>Text value</b:sub></a:item>
# will be
'a:item' => { 'b:sub' => 'Text value' };
# with
@STRIP_KEY = (qr/^[^:]+:/);
# will be
'item' => { 'sub' => 'Text value' };
=back
=cut
sub DEBUG () { 0 };
our %TEXT = (
ATTR => '-',
NODE => '#text',
JOIN => '',
TRIM => 1,
);
our @STRIP_KEY;
# '' means all since this can't be a name of tag
our %FORCE_ARRAY = ( '' => 0 );
our %FORCE_HASH = ( '' => 0 );
sub Init {
my $xp = shift;
my $t = $xp->{FunTree} ||= {};
$t->{stack} = [];
$t->{tree} = {};
$t->{context} = { tree => {}, text => [] };
$t->{opentag} = undef;
$t->{depth} = 0 if DEBUG;
return;
}
sub Start {
my $xp = shift;
my $t = $xp->{FunTree};
#if ($enc) { @_ = @_; $_ = $enc->encode($_) for @_ };
my $tag = shift;
$tag =~ s{$_}{} for @STRIP_KEY;
warn "++"x(++$t->{depth}) . $tag if DEBUG;
my $node = {
name => $tag,
tree => undef,
text => [],
textflag => 0,
};
Scalar::Util::weaken($node->{parent} = $t->{context});
if (@_) {
my %attr;
while (my ($k,$v) = splice @_,0,2) {
$attr{ $TEXT{ATTR}.$k } = $v;
}
#$flat[$#flat]{attributes} = \%attr;
$node->{attrs} = \%attr;
#warn "Need something to do with attrs on $tag\n";
};
$t->{opentag} = 1;
{
if (@{ $t->{context}{text} }) {
${ $t->{context}{text} }[ $#{ $t->{context}{text} } ] =~ s{[\t\s\r\n]+$}{}s if $TEXT{TRIM};
# warn "cleaning trailing whitespace on $#{ $t->{context}{text} } : ${ $t->{context}{text} }[ $#{ $t->{context}{text} } ]";
pop (@{ $t->{context}{text} }),redo unless length ${ $t->{context}{text} }[ $#{ $t->{context}{text} } ];
}
}
#push @{ $t->{context}{text} }, $TEXT{JOIN} if $t->{context}{textflag} and length $TEXT{JOIN};
$t->{context}{textflag} = 0;
push @{ $t->{stack} }, $t->{context} = $node;
}
sub End {
my $xp = shift;
my $t = $xp->{FunTree};
#if ($enc) { @_ = @_; $_ = $enc->encode($_) for @_ };
my $name = shift;
$name =~ s{$_}{} for @STRIP_KEY;
#my $node = pop @stack;
my $text = $t->{context}{text};
$t->{opentag} = 0;
my $tree = $t->{context}{tree};
my $haschild = scalar keys %$tree;
if ( ! $FORCE_ARRAY{''} ) {
foreach my $key ( keys %$tree ) {
#warn "$key for $name\n";
next if $FORCE_ARRAY{$key};
next if ( 1 < scalar @{ $tree->{$key} } );
$tree->{$key} = shift @{ $tree->{$key} };
}
}
if ( @$text ) {
{
${ $text }[ $#$text ] =~ s{[\t\s\r\n]+$}{}s if $TEXT{TRIM};
# warn "cleaning trailing whitespace on $#$text :${ $text }[ $#$text ]";
pop (@$text),redo unless length ${ $text }[ $#$text ];
}
#warn "node $name have text '@$text'";
if ( @$text == 1 ) {
# one text node (normal)
$text = shift @$text;
}
else {
# some text node splitted
$text = join( $TEXT{JOIN}, @$text );
}
if ( $haschild ) {
# some child nodes and also text node
$tree->{$TEXT{NODE}} = $text;
}
else {
# only text node without child nodes
$tree = $text;
}
}
elsif ( ! $haschild ) {
# no child and no text
$tree = "";
}
# Move up!
my $child = $tree;
#warn "parent for $name = $context->{parent}\n";
my $elem = $t->{context}{attrs};
my $hasattr = scalar keys %$elem if ref $elem;
# my $forcehash = $FORCE_HASH_ALL || ( $t->{context}{parent}{name} && $FORCE_HASH{$t->{context}{parent}{name}} ) || 0;
my $forcehash = $FORCE_HASH{''} || ( $name && $FORCE_HASH{$name} ) || 0;
#warn "$t->{context}{parent}{name} => $name forcehash = $forcehash\n";
$t->{context} = $t->{context}{parent};
#warn "$context->{name} have ".Dumper ($elem);
if ( ref $child eq "HASH" ) {
if ( $hasattr ) {
# some attributes and some child nodes
%$elem = ( %$elem, %$child );
}
else {
# some child nodes without attributes
$elem = $child;
}
}
else {
if ( $hasattr ) {
# some attributes and text node
#warn "${name}: some attributes and text node";
$elem->{$TEXT{NODE}} = $child;
}
elsif ( $forcehash ) {
# only text node without attributes
$elem = { $TEXT{NODE} => $child };
}
else {
# text node without attributes
$elem = $child;
}
}
warn "--"x($t->{depth}--) . $name if DEBUG;
push @{ $t->{context}{tree}{$name} ||= [] },$elem;
$name = $t->{context}{name};
$tree = $t->{context}{tree} ||= {};
warn "unused args on /$name: @_" if @_;
}
sub Char {
my $xp = shift;
my $t = $xp->{FunTree};
#if ($enc) { @_ = @_; $_ = $enc->encode($_) for @_ };
my $text = shift;
unless ($t->{context}{textflag}) {
$text =~ s{^[\t\s\r\n]+}{}s if $TEXT{TRIM};
}
if ( length $text ){
warn ".."x(1+$t->{depth}) . $text if DEBUG;
if ($t->{context}{textflag}) {
${ $t->{context}{text} }[ $#{ $t->{context}{text} } ] .= $text;
} else {
push @{ $t->{context}{text} }, $text;
}
$t->{context}{textflag} = 1;
};
}
sub Final {
my $tree = $_[0]{FunTree}{context}{tree};
delete $_[0]{FunTree};
if ( ! $FORCE_ARRAY{''} ) {
foreach my $key ( keys %$tree ) {
next if $FORCE_ARRAY{$key};
next if ( 1 < scalar @{ $tree->{$key} } );
$tree->{$key} = shift @{ $tree->{$key} };
}
}
return $tree;
}
=head1 SEE ALSO
=over 4
=item * L<XML::Parser>
The parser itself
=item * L<XML::Parser::EasyTree>
Another EasyTree (I didn't found it before my first commit of this package because of missing '::Style' in it's name)
But since L<XML::Parser::EasyTree> and L<XML::Parser::Style::EasyTree> use same style name, they're mutual exclusive ;(
So, all the functionality was moved to ETree, and EasyTree was kept as a compatibility wrapper
=item * L<XML::Bare>
Very-very fast XML parser. Recommend to look
=item * L<XML::Hash::LX>
Similar behaviour, same output, but using L<XML::LibXML>
=back
=head1 AUTHOR
Mons Anderson, <mons at cpan.org>
=head1 BUGS
None known
=head1 COPYRIGHT & LICENSE
Copyright 2009 Mons Anderson
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1;