package XML::Filter::Dispatcher::AsStructHandler; =head1 NAME XML::Filter::Dispatcher::AsStructHandler - convert SAX stream in to simple, data-oriented structure =head1 SYNOPSIS ## Ordinarily used via the XML::Filter::Dispatcher's as_data_struct() ## built-in extension function for XPath =head1 DESCRIPTION This SAX2 handler builds a simple hash from XML. Text from each element and attribute is stored in the hash with a key of a relative path from the root down to the current element. The goal is to produce a usable structure as simply and quickly as possible; use L for more sophisticated applications. The resulting data structure has one hash per element, one scalar per attribute, and one scalar per text string in each leaf element. Warnings are emitted if any content other than whitespace is discarded. The root element name is discarded. If you are using namespaces, you must pass in the C option, otherwise not. Using namespaces without a C option or vice versa will not work. Only C, C, C, C, and C are provided; so all comments, processing instructions etc., are discarded. =head2 Examples This XML: B1 B2 with no options produces this structure: { '@a' => 'A', 'a/@aa1' => 'AA1', 'a/@aa2' => 'AA2' 'a/b' => 'B2', '' => ' B1 B2 ', 'a' => ' B1 B2 ', } Note 1: the name of the root element is discarded. Note 2: the contents of the first C<< >> element are not directly accessible; like standard Perl hashes, the later initialization overwrites the former. Much data oriented XML does not have this issue. This XML: B1 B2 With these options: XML::Filter::Dispatcher::AsStructHandler->new( Namespaces => { "" => "default-ns", "bar" => "foo-ns", }, Rules => [ "hash( root )" => sub { Dumper xvalue }, ], ) produces this structure: { '@a' => 'A', '@bar:a' => 'FOOA', 'a/@aa1' => 'AA1', 'a/@aa2' => 'AA2' 'a/b' => 'B2', '' => ' B1 B2 ', 'a' => ' B1 B2 ', } =head1 Methods =over =cut use strict; ## Config use constant PrefixesByURI => 0; ## Calculated from that option ## Running state use constant Stack => 1; use constant Hash => 2; use constant Characters => 3; =item new see above. =cut sub new { my $class = ref $_[0] ? ref $_[0] : shift; my %options = ( @_ == 1 ? %{$_[0]} : @_ ); my $self = bless [ ], $class; $self->set_namespaces( %{ delete $options{Namespaces} } ) if exists $options{Namespaces}; warn __PACKAGE__, " ignoring unknown options ", join ", ", keys %options if keys %options; return $self; } =item set_namespaces $h->set_namespaces( prefix1 => uri1, ); =cut sub set_namespaces { my $self = shift; $self->[PrefixesByURI] = @_ ? { reverse @_ } : undef; } sub start_document { my $self = shift; $self->[Hash] = undef; ## start_document doesn't set things up; start_element() does. This ## is because start_element() needs to discriminate the root element ## from contained elements; this in turn is because this handler ## may be turning an entire document in to a structure (in other words, ## the sender of SAX events can start sending any time before the ## root element this way; makes it easier). } sub end_document { return shift->[Hash] } sub start_element { my ( $self, $elt ) = ( shift, shift ); my $name; if ( $self->[PrefixesByURI] ) { my $prefix = $self->[PrefixesByURI]->{$elt->{NamespaceURI}}; die "Unknown namespace URI '$elt->{NamespaceURI}' for $elt->{Name}\n" unless defined $prefix; $name = length $prefix ? join ":", $prefix, $elt->{LocalName} : $elt->{LocalName}; } else { $name = $elt->{Name}; } my $hash = { "" => "", ## Character content }; unless ( defined $self->[Hash] ) { ## Root elt. @{$self->[Stack]} = (); } else { ## Nested element push @{$self->[Stack]}, $self->[Hash]; push @{$self->[Hash]->{$name}}, $hash; } $self->[Hash] = $hash; for my $attr ( values %{$elt->{Attributes}} ) { my $name; if ( $self->[PrefixesByURI] ) { next if $attr->{Prefix} eq "xmlns" || $attr->{Name} eq "xmlns"; my $prefix = length $attr->{NamespaceURI} ? $self->[PrefixesByURI]->{$attr->{NamespaceURI}} : ""; die "Unknown namespace URI '$attr->{NamespaceURI}' for $elt->{Name} attribute $attr->{Name}\n" unless defined $prefix; $name = length $prefix ? join ":", $prefix, $attr->{LocalName} : $attr->{LocalName}; } else { $name = $attr->{Name}; } $hash->{"\@$name"} = $attr->{Value}; } } sub characters { $_[0]->[Hash]->{""} .= $_[1]->{Data} } sub end_element { my $self = shift; $self->[Hash] = pop @{$self->[Stack]} if @{$self->[Stack]}; } =head1 AUTHOR Barrie Slaymaker =cut 1;