#!/usr/local/bin/perl -w
#
# $Revision: 1.1.1.1 $
#
# $Date: 2003-07-27 11:07:11 $
use XML::Parser;
my $Usage = <<'End_of_Usage;';
Usage is:
xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat]
[{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile
Prints on standard output the result of filtering the given xmlfile
for elements according to the switches. A '-' option will drop the
element from the output; a '+' will keep it. The output should also
be a well-formed XML document.
-h Print this message
-nl Emit a newline prior to every start tag.
[-+]root Drop (or keep) the root element. Defaults to keep.
If the root element were named "foo", then -root
would be equivalent to -el=foo. Note that even if
you're dropping the root element, it's start and
end tag are kept in order that the output remains
a well-formed XML document.
[-+]el=elname
Drop (or keep) elements of type elname.
[-+]el:elnamepat
Drop (or keep) element whose type name matches elnamepat.
[-+]att:attname
Drop (or keep) elements which have an attribute = attname.
[-+]att:attname:attvalpat
Drop (or keep) elements which have an attribute = attname
and for which the attribute value matches attvalpat.
End_of_Usage;
my $pass = 1;
my $do_newline = 0;
my $attcheck = 0;
my %drop_el;
my @drop_elpat;
my %keep_el;
my @keep_elpat;
my %drop_att;
my %keep_att;
my $always_true = sub { 1; };
my $root_element = '';
my $in_cdata = 0;
# Process options
while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) {
my $opt = shift;
if ( $opt eq '-root' ) {
$pass = 0;
}
elsif ( $opt eq '+root' ) {
$pass = 1;
}
elsif ( $opt eq '-h' ) {
print $Usage;
exit;
}
elsif ( $opt eq '-nl' ) {
$do_newline = 1;
}
elsif ( $opt =~ /^([-+])el([:=])(\S*)/ ) {
my ( $disp, $kind, $pattern ) = ( $1, $2, $3 );
my ( $hashref, $aref );
if ( $disp eq '-' ) {
$hashref = \%drop_el;
$aref = \@drop_elpat;
}
else {
$hashref = \%keep_el;
$aref = \@keep_elpat;
}
if ( $kind eq '=' ) {
$hashref->{$pattern} = 1;
}
else {
push( @$aref, $pattern );
}
}
elsif ( $opt =~ /^([-+])att:(\w+)(?::(\S*))?/ ) {
my ( $disp, $id, $pattern ) = ( $1, $2, $3 );
my $ref = ( $disp eq '-' ) ? \%drop_att : \%keep_att;
if ( defined($pattern) ) {
$pattern =~ s!/!\\/!g;
my $sub;
eval "\$sub = sub {\$_[0] =~ /$pattern/;};";
$ref->{$id} = $sub;
}
else {
$ref->{$id} = $always_true;
}
$attcheck = 1;
}
else {
die "Unknown option: $opt\n$Usage";
}
}
my $drop_el_pattern = join( '|', @drop_elpat );
my $keep_el_pattern = join( '|', @keep_elpat );
my $drop_sub;
if ($drop_el_pattern) {
eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}";
}
else {
$drop_sub = sub { };
}
my $keep_sub;
if ($keep_el_pattern) {
eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}";
}
else {
$keep_sub = sub { };
}
my $doc = shift;
die "No file specified\n$Usage" unless defined($doc);
my @togglestack = ();
my $p = new XML::Parser(
ErrorContext => 2,
Handlers => {
Start => \&start_handler,
End => \&end_handler
}
);
if ($pass) {
$p->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
$p->parsefile($doc);
print "</$root_element>\n"
unless $pass;
################
## End of main
################
sub start_handler {
my $xp = shift;
my $el = shift;
unless ($root_element) {
$root_element = $el;
print "<$el>\n"
unless $pass;
}
my ( $elref, $attref, $sub );
if ($pass) {
$elref = \%drop_el;
$attref = \%drop_att;
$sub = $drop_sub;
}
else {
$elref = \%keep_el;
$attref = \%keep_att;
$sub = $keep_sub;
}
if ( defined( $elref->{$el} )
or &$sub($el)
or check_atts( $attref, @_ ) ) {
$pass = !$pass;
if ($pass) {
$xp->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
else {
$xp->setHandlers(
Char => 0,
CdataStart => 0,
CdataEnd => 0
);
}
push( @togglestack, $xp->depth );
}
if ($pass) {
print "\n" if $do_newline;
print "<$el";
while (@_) {
my $id = shift;
my $val = shift;
$val = $xp->xml_escape( $val, "'" );
print " $id='$val'";
}
print ">";
}
} # End start_handler
sub end_handler {
my $xp = shift;
my $el = shift;
if ($pass) {
print "</$el>";
}
if ( @togglestack and $togglestack[-1] == $xp->depth ) {
$pass = !$pass;
if ($pass) {
$xp->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
else {
$xp->setHandlers(
Char => 0,
CdataStart => 0,
CdataEnd => 0
);
}
pop(@togglestack);
}
} # End end_handler
sub char_handler {
my ( $xp, $text ) = @_;
if ( length($text) ) {
$text = $xp->xml_escape( $text, '>' )
unless $in_cdata;
print $text;
}
} # End char_handler
sub cdata_start {
my $xp = shift;
print '<![CDATA[';
$in_cdata = 1;
}
sub cdata_end {
my $xp = shift;
print ']]>';
$in_cdata = 0;
}
sub check_atts {
return $attcheck unless $attcheck;
my $ref = shift;
while (@_) {
my $id = shift;
my $val = shift;
if ( defined( $ref->{$id} ) ) {
my $ret = &{ $ref->{$id} }($val);
return $ret if $ret;
}
}
return 0;
} # End check_atts
# Tell Emacs that this is really a perl script
# Local Variables:
# mode:perl
# End: