#----------------------------------------------------------------------------- # # $Id : XPath.pm 2.214 2006-03-18 JMG$ # # Initial developer: Jean-Marie Gouarne # Copyright 2006 by Genicorp, S.A. (www.genicorp.com) # License: # - Licence Publique Generale Genicorp v1.0 # - GNU Lesser General Public License v2.1 # #----------------------------------------------------------------------------- package OpenOffice::OODoc::XPath; use 5.008_000; our $VERSION = 2.214; use XML::Twig 3.22; use Encode; #------------------------------------------------------------------------------ our %XMLNAMES = # OODoc root element names ( 'content' => 'office:document-content', 'styles' => 'office:document-styles', 'meta' => 'office:document-meta', 'manifest' => 'manifest:manifest', 'settings' => 'office:document-settings' ); # characters to be escaped in XML our $CHARS_TO_ESCAPE = "\"<>'&"; # standard external character set our $LOCAL_CHARSET = 'iso-8859-1'; # OpenOffice.org character set our $OO_CHARSET = 'utf8'; #------------------------------------------------------------------------------ # basic conversion between internal & printable encodings sub OpenOffice::OODoc::XPath::decode_text { return Encode::encode($LOCAL_CHARSET, shift); } sub OpenOffice::OODoc::XPath::encode_text { return Encode::decode($LOCAL_CHARSET, shift); } #------------------------------------------------------------------------------ # basic element creation sub OpenOffice::OODoc::XPath::new_element { my $name = shift or return undef; return undef if ref $name; $name =~ s/^\s+//; $name =~ s/\s+$//; if ($name =~ /^<.*>$/) # create element from XML string { return OpenOffice::OODoc::Element->parse($name, @_); } else # create element from name and optional data { return OpenOffice::OODoc::Element->new($name, @_); } } #------------------------------------------------------------------------------ # text node creation sub OpenOffice::OODoc::XPath::new_text_node { return OpenOffice::OODoc::XPath::new_element('#PCDATA'); } #------------------------------------------------------------------------------ # basic conversion between internal & printable encodings (object version) sub inputTextConversion { my $self = shift; my $text = shift; my $local_encoding = $self->{'local_encoding'} or return $text; return Encode::decode($local_encoding, $text); } sub outputTextConversion { my $self = shift; my $text = shift; my $local_encoding = $self->{'local_encoding'} or return $text; return Encode::encode($local_encoding, $text); } sub localEncoding { my $self = shift; my $encoding = shift; $self->{'local_encoding'} = $encoding if $encoding; return $self->{'local_encoding'} || ''; } sub noLocalEncoding { my $self = shift; delete $self->{'local_encoding'}; return 1; } #------------------------------------------------------------------------------ # search/replace text processing routine # if $replace is a user-provided routine, it's called back with # the current argument stack, plus the substring found sub _find_text { my $self = shift; my $text = shift; my $pattern = $self->inputTextConversion(shift); my $replace = shift; if (defined $pattern) { if (defined $replace) { if (ref $replace) { if ((ref $replace) eq 'CODE') { return undef unless ( $text =~ s/($pattern)/ { my $found = $1; Encode::_utf8_on($found) if Encode::is_utf8($text); my $result = &$replace(@_, $found); $result = $found unless (defined $result); $result; } /eg ); } else { return undef unless ($text =~ /$pattern/); } } else { my $r = $self->inputTextConversion($replace); return undef unless ($text =~ s/$pattern/$r/g); } } else { return undef unless ($text =~ /$pattern/); } } return $text; } #------------------------------------------------------------------------------ # search/replace content in descendant nodes sub _search_content { my $self = shift; my $element = shift or return undef; my $content = undef; foreach my $child ($element->children) { my $text = undef; if ($child->isTextNode) { $text = $self->_find_text($child->text, @_); $child->set_text($text) if defined $text; } else { my $t = $self->_search_content($child, @_); $text .= $t if defined $t; } $content .= $text if defined $text; } return $content; } #------------------------------------------------------------------------------ # is this an OASIS Open Document or an OpenOffice 1.x Document ? sub isOpenDocument { my $self = shift; my $root = $self->getRootElement; my $ns = $root->att('xmlns:office'); return $ns && ($ns =~ /opendocument/) ? 1 : undef; } #------------------------------------------------------------------------------ # document class check sub isContent { my $self = shift; return ($self->contentClass()) ? 1 : undef; } sub isCalcDocument { my $self = shift; return ($self->contentClass() eq 'spreadsheet') ? 1 : undef; } sub isImpressDocument { my $self = shift; return ($self->contentClass() eq 'impress') ? 1 : undef; } sub isDrawDocument { my $self = shift; return ($self->contentClass() eq 'drawing') ? 1 : undef; } sub isWriterDocument { my $self = shift; return ($self->contentClass() eq 'text') ? 1 : undef; } #------------------------------------------------------------------------------ # constructor; accepts one from 3 types of parameters to create an instance: # file => a regular OpenOffice.org filename # archive => an OODoc::File, previously created object # xml => an XML string, representing an OO XML member # if 'file' or 'archive' (not 'xml') is provided, another parameter 'member' # must be provided in addition # member => member of the zip archive (meta.xml, content.xml, ...) sub new { my $caller = shift; my $class = ref($caller) || $caller; my $self = { auto_style_path => '//office:automatic-styles', master_style_path => '//office:master-styles', named_style_path => '//office:styles', image_container => 'draw:image', image_xpath => '//draw:image', image_fpath => '#Pictures/', local_encoding => $OpenOffice::OODoc::XPath::LOCAL_CHARSET, @_ }; my $twig = undef; if ($self->{'member'} && ! $self->{'element'}) { my $m = lc $self->{'member'}; $m =~ /(^.*)\..*/; $m = $1 if $1; $self->{'element'} = $OpenOffice::OODoc::XPath::XMLNAMES{$m}; } # create the XML::Twig if ( $self->{'readable_XML'} && ( ($self->{'readable_XML'} eq '1') || ($self->{'readable_XML'} eq 'true') || ($self->{'readable_XML'} eq 'on') ) ) { $self->{'readable_XML'} = 'indented'; } $self->{'element'} = $OpenOffice::OODoc::XPath::XMLNAMES{'content'} unless $self->{'element'}; if ($self->{'element'}) { $twig = XML::Twig->new ( elt_class => "OpenOffice::OODoc::Element", twig_roots => { $self->{'element'} => 1 }, pretty_print => $self->{'readable_XML'}, %{$self->{'twig_options'}} ); } else { $twig = XML::Twig->new ( elt_class => "OpenOffice::OODoc::Element", pretty_print => $self->{'readable_XML'}, %{$self->{'twig_options'}} ); } unless ($self->{'archive'}) { if (ref $self->{'file'}) { my $obj = $self->{'file'}; if ($obj->isa("OpenOffice::OODoc::File")) { $self->{'archive'} = $obj; } elsif ($obj->isa("OpenOffice::OODoc::XPath")) { $self->{'archive'} = $obj->{'archive'}; } else { warn "[" . __PACKAGE__ . "::new] " . "Invalid file object\n"; return undef; } delete $self->{'file'}; } } if ($self->{'xml'}) # load from XML string { $self->{'xpath'} = $twig->safe_parse($self->{'xml'}); } elsif ($self->{'archive'}) # load from existing OOFile { unless ($self->{'archive'}->isa("OpenOffice::OODoc::File")) { warn "[" . __PACKAGE__ . "] Invalid archive\n"; return undef; } $self->{'member'} = 'content' unless $self->{'member'}; $self->{'xml'} = $self->{'archive'}->link($self); $self->{'xpath'} = $twig->safe_parse($self->{'xml'}); } elsif ($self->{'file'}) # look for a file { unless ( $self->{'flat_xml'} || (lc $self->{'file'}) =~ /\.xml$/ ) { # create OOFile & extract from it require OpenOffice::OODoc::File; $self->{'archive'} = OpenOffice::OODoc::File->new ( $self->{'file'}, create => $self->{'create'}, opendocument => $self->{'opendocument'}, template_path => $self->{'template_path'} ); $self->{'member'} = 'content' unless $self->{'member'}; $self->{'xml'} = $self->{'archive'}->link($self); $self->{'xpath'} = $twig->safe_parse($self->{'xml'}); } else { # load from XML flat file $self->{'xpath'} = $twig->safe_parsefile($self->{'file'}); } } else { warn "[" . __PACKAGE__ . "::new] No XML content\n"; return undef; } delete $self->{'xml'}; unless ($self->{'xpath'}) { warn "[" . __PACKAGE__ . "::new] No well formed content\n"; return undef; } $self->{'twig'} = $twig; $self->{'context'} = $self->{'xpath'}; bless $self, $class; $self->{'opendocument'} = $self->isOpenDocument; if ($self->{'opendocument'}) { $self->{'image_container'} = 'draw:frame'; $self->{'image_xpath'} = '//draw:frame'; $self->{'image_fpath'} = 'Pictures/'; } $self->{'body'} = $self->getBody; return $self; } #------------------------------------------------------------------------------ # destructor sub DESTROY { my $self = shift; delete $self->{'file'}; delete $self->{'context'}; delete $self->{'xpath'}; delete $self->{'xml'}; delete $self->{'body'}; delete $self->{'content_class'}; $self->{'twig'}->dispose if $self->{'twig'}; delete $self->{'twig'}; delete $self->{'archive'}; delete $self->{'twig_options'}; $self = {}; } sub dispose { my $self = shift; $self->DESTROY(@_); } #------------------------------------------------------------------------------ # get a reference to the embedded XML parser for share sub getXMLParser { warn "[" . __PACKAGE__ . "::getXMLParser] No longer implemented\n"; return undef; } #------------------------------------------------------------------------------ # make the changes persistent in an OpenOffice.org file sub save { my $self = shift; my $target = shift; my $filename = ($target) ? $target : $self->{'file'}; my $archive = $self->{'archive'}; unless ($archive) { my $ro = $self->{'read_only'}; return undef if $ro && (($ro eq '1') || ($ro eq 'on') || ($ro eq 'true')); if ($filename) { open my $fh, ">:utf8", $filename; $self->exportXMLContent($fh); close $fh; return $filename; } else { warn "[" . __PACKAGE__ . "::save] No archive object\n"; return undef; } } $filename = $archive->{'source_file'} unless $filename; unless ($filename) { warn "[" . __PACKAGE__ . "::save] No target file\n"; return undef; } my $member = $self->{'member'}; unless ($member) { warn "[" . __PACKAGE__ . "::save] No member\n"; return undef; } my $result = $archive->save($filename); return $result; } sub update { my $self = shift; return $self->save(@_); } #------------------------------------------------------------------------------ # raw file import sub raw_import { my $self = shift; if ($self->{'archive'}) { my $target = shift; unless ($target) { warn "[" . __PACKAGE__ . "::raw_import] " . "No target member for import\n"; return undef; } $target =~ s/^#//; return $self->{'archive'}->raw_import($target, @_); } else { warn "[" . __PACKAGE__ . "::raw_import] " . "No archive for file import\n"; return undef; } } #------------------------------------------------------------------------------ # raw file export sub raw_export { my $self = shift; if ($self->{'archive'}) { my $source = shift; unless ($source) { warn "[" . __PACKAGE__ . "::raw_import] " . "Missing source file name\n"; return undef; } $source =~ s/^#//; return $self->{'archive'}->raw_export($source, @_); } else { warn "[" . __PACKAGE__ . "::raw_import] " . "No archive for file export\n"; return undef; } } #------------------------------------------------------------------------------ # exports the whole content of the document as an XML string sub exportXMLContent { my $self = shift; my $target = shift; if ($target) { return $self->{'twig'}->print($target); } else { return $self->{'twig'}->sprint; } } sub getXMLContent { my $self = shift; return $self->exportXMLContent(@_); } sub getContent { my $self = shift; return $self->exportXMLContent(@_); } #------------------------------------------------------------------------------ # brute force tree reorganization sub reorganize { warn "[" . __PACKAGE__ . "::reorganize] No longer implemented\n"; return undef; } #------------------------------------------------------------------------------ # returns the root of the XML document sub getRoot { my $self = shift; return $self->{'xpath'}->root; } #------------------------------------------------------------------------------ # returns the root element of the XML document sub getRootElement { my $self = shift; my $root = $self->{'xpath'}->root; my $rootname = $root->name() || ''; return ($rootname eq $self->{'element'}) ? $root : $root->first_child($self->{'element'}); } #------------------------------------------------------------------------------ # get/set/reset the current search context sub currentContext { my $self = shift; my $new_context = shift; $self->{'context'} = $new_context if (ref $new_context); return $self->{'context'}; } sub resetCurrentContext { $self = shift; return $self->currentContext($self->{'xpath'}); } #------------------------------------------------------------------------------ # returns the content class (text, spreadsheet, presentation, drawing) sub contentClass { my $self = shift; my $content_class = $self->getRootElement->getAttribute('office:class'); return $content_class if $content_class; my $body = $self->getBody or return undef; my $name = $body->name or return undef; $name =~ /(.*):(.*)/; return $2; } #------------------------------------------------------------------------------ # element name check sub getRootName { my $self = shift; return $self->getRootElement->name; } #------------------------------------------------------------------------------ # member type checks sub isMeta { my $self = shift; return ($self->getRootName() eq $XMLNAMES{'meta'}) ? 1 : undef; } sub isStyles { my $self = shift; return ($self->getRootName() eq $XMLNAMES{'styles'}) ? 1 : undef; } sub isSettings { my $self = shift; return ($self->getRootName() eq $XMLNAMES{'settings'}) ? 1 : undef; } #------------------------------------------------------------------------------ # returns the document body element (if defined) sub getBody { my $self = shift; return $self->{'body'} if ref $self->{'body'}; if ($self->{'body_path'}) { $self->{'body'} =$self->getElement($self->{'body_path'}, 0); return $self->{'body'}; } my $office_body = $self->getElement('//office:body', 0); if ($office_body) { $self->{'body'} = $self->{'opendocument'} ? $office_body->selectChildElement ('office:(text|spreadsheet|presentation|drawing)') : $office_body; } else { $self->{'body'} = $self->getRootElement->selectChildElement ( 'office:(body|meta|master-styles|settings)' ); } return $self->{'body'}; } #------------------------------------------------------------------------------ # makes the current OODoc::XPath object share the same content as another one sub cloneContent { $self = shift; $source = shift; unless ($source && $source->{'xpath'}) { warn "[" . __PACKAGE__ . "]::cloneContent - No valid source\n"; return undef; } $self->{'xpath'} = $source->{'xpath'}; $self->{'begin'} = $source->{'begin'}; $self->{'xml'} = $source->{'xml'}; $self->{'end'} = $source->{'end'}; return $self->getRoot; } #------------------------------------------------------------------------------ # exports an individual element as an XML string sub exportXMLElement { my $self = shift; my $path = shift; my $element = (ref $path) ? $path : $self->getElement($path, shift); my $text = $element->sprint(@_); return $text; } #------------------------------------------------------------------------------ # exports the document body (if defined) as an XML string sub exportXMLBody { my $self = shift; return $self->exportXMLElement($self->getBody, @_); } #------------------------------------------------------------------------------ # gets the reference of an XML element identified by path & position # for subsequent processing sub getElement { my $self = shift; my $path = shift; return undef unless $path; if (ref $path) { return $path->isElementNode ? $path : undef; } my $pos = shift || 0; my $context = shift || $self->{'context'}; if (defined $pos && (($pos =~ /^\d*$/) || ($pos =~ /^[\d+-]\d+$/))) { $path =~ s/^\/\//\.\// unless $context eq $self->{'xpath'}; my $node = $context->get_xpath($path, $pos); return $node && $node->isElementNode ? $node : undef; } else { warn "[" . __PACKAGE__ . "::getElement] " . "Invalid node position\n"; return undef; } } #------------------------------------------------------------------------------ # get the list of children (or the first child unless wantarray) matching # a given element name and belonging to a given element sub selectChildElementsByName { my $self = shift; my $path = shift; my $element = ref $path ? $path : $self->getElement($path, shift); return undef unless $element; return $element->selectChildElements(@_); } #------------------------------------------------------------------------------ # get the first child belonging to a given element and matching a given name sub selectChildElementByName { my $self = shift; my $path = shift; my $element = ref $path ? $path : $self->getElement($path, shift); return undef unless $element; return $element->selectChildElement(@_); } sub getChildElementByName { my $self = shift; return $self->selectChildElementByName(@_); } #----------------------------------------------------------------------------- sub createSpaces { my $self = shift; my $length = shift or return undef; my $element = $self->createElement('text:s'); $element->set_att('text:c' => $length); return $element; } #------------------------------------------------------------------------------ sub appendLineBreak { my $self = shift; my $element = shift; return $element->appendChild('text:line-break'); } #------------------------------------------------------------------------------ sub appendSpaces { my $self = shift; my $element = shift; my $length = shift; my $spaces = $self->createSpaces($length) or return undef; $spaces->paste_last_child($element); } #------------------------------------------------------------------------------ sub appendTabStop { my $self = shift; my $element = shift; my $tabtag = $self->{'opendocument'} ? 'text:tab' : 'text:tab-stop'; return $element->appendChild($tabtag); } #------------------------------------------------------------------------------ # replaces any previous content of an existing element by a given text sub setText { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $text = shift; return undef unless defined $text; my $element = $self->OpenOffice::OODoc::XPath::getElement ($path, $pos); return undef unless $element; unless ($text) { $element->set_text($text); return $text; } my $tabtag = $self->{'opendocument'} ? 'text:tab' : 'text:tab-stop'; $element->set_text(""); my @lines = split "\n", $text; while (@lines) { my $line = shift @lines; my @columns = split "\t", $line; while (@columns) { my $column = $self->inputTextConversion(shift @columns); $element->appendTextChild($column); $element->appendChild($tabtag) if (@columns); } $element->appendChild('text:line-break') if (@lines); } return $text; } #------------------------------------------------------------------------------ # extends the text of an existing element sub extendText { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $text = shift; return undef unless defined $text; my $element = $self->getElement($path, $pos); return undef unless $element; if (ref $text) { $text->paste_last_child($element) if $text->isElementNode; return $text; } my $tabtag = $self->{'opendocument'} ? 'text:tab' : 'text:tab-stop'; my @lines = split "\n", $text; while (@lines) { my $line = shift @lines; my @columns = split "\t", $line; while (@columns) { my $column = $self->inputTextConversion(shift @columns); $element->appendTextChild($column); $element->appendChild($tabtag) if (@columns); } $element->appendChild('text:line-break') if (@lines); } return $text; } #------------------------------------------------------------------------------ # creates a new encoded text node sub createTextNode { my $self = shift; my $text = shift or return undef; my $content = $self->inputTextConversion($text); return OpenOffice::OODoc::Element->new('#PCDATA' => $text); } #------------------------------------------------------------------------------ # replaces substring in an element sub replaceText { my $self = shift; my $path = shift; my $element = (ref $path) ? $path : $self->getElement($path, shift); return $self->_search_content($element, @_); } #------------------------------------------------------------------------------ # gets text in element by path (sub-element texts are concatenated) sub getText { my $self = shift; my $element = $self->OpenOffice::OODoc::XPath::getElement(@_); return undef unless ($element && $element->isElementNode); my $text = ''; my $name = $element->getName; if ($name eq 'text:tab-stop') { return "\t"; } if ($name eq 'text:line-break') { return "\n"; } if ($name eq 'text:s') { my $spaces = ""; my $count = $element->att('text:c') || 1; while ($count > 0) { $spaces .= ' '; $count--; } return $spaces; } foreach my $node ($element->getChildNodes) { if ($node->isElementNode) { $text .= ( $self->getText($node) || '' ); } else { my $t = ($node->getValue() || ''); $text .= $self->outputTextConversion($t); } } return $text; } #------------------------------------------------------------------------------ # returns the children of a given element sub getElementList { my $self = shift; my $path = shift; my $context = shift; if ($context) { $path = "./" unless $path; } else { $path = "/" unless $path; $context = $self->{'context'}; } $path =~ s/^\/\//\.\// if $context ne $self->{'xpath'}; return $context->findnodes($path); } #------------------------------------------------------------------------------ # brute XPath nodelist selection; allows any XML::XPath expression sub selectNodesByXPath { my $self = shift; my ($p1, $p2) = @_; my $path = undef; my $context = undef; if (ref $p1) { $context = $p1; $path = $p2; } else { $path = $p1; $context = $p2; } $context = $self->{'context'} unless ref $context; return $context->get_xpath($path); } #------------------------------------------------------------------------------ # brute XPath single node selection; allows any XML::XPath expression sub selectNodeByXPath { my $self = shift; my ($p1, $p2) = @_; my $path = undef; my $context = undef; if (ref $p1) { $context = $p1; $path = $p2; } else { $path = $p1; $context = $p2; } $context = $self->{'context'} unless ref $context; return $context->get_xpath($path, 0); } sub getNodeByXPath { my $self = shift; return $self->selectNodeByXPath(@_); } #------------------------------------------------------------------------------ # brute XPath value extraction; allows any XML::XPath expression sub getXPathValue { my $self = shift; my ($p1, $p2) = @_; my $path = undef; my $context = undef; if (ref $p1) { $context = $p1; $path = $p2; } else { $path = $p1; $context = $p2; } $context = $self->{'context'} unless $context; $path =~ s/^\/*// if $context ne $self->{'xpath'}; return $self->outputTextConversion($context->findvalue($path, @_)); } #------------------------------------------------------------------------------ # create or update an xpath sub makeXPath { my $self = shift; my $path = shift; my $root = undef; if (ref $path) { $root = $path; $path = shift; } else { $root = $self->getRoot; } $path =~ s/^[\/ ]*//; $path =~ s/[\/ ]*$//; my @list = split '/', $path; my $posnode = $root; while (@list) { my $item = shift @list; while (($item =~ /\[.*/) && !($item =~ /\[.*\]/)) { my $cont = shift @list or last; $item .= ('/' . $cont); } next unless $item; my $node = undef; my $name = undef; my $param = undef; $item =~ s/\[(.*)\] *//; $param = $1; $name = $item; $name =~ s/^ *//; $name =~ s/ *$//; my %attributes = (); my $text = undef; my $indice = undef; if ($param) { my @attrlist = []; $indice = undef; $param =~ s/^ *//; $param =~ s/ *$//; $param =~ s/^@//; @attrlist = split /@/, $param; foreach my $a (@attrlist) { next unless $a; $a =~ s/^ *//; my $tmp = $a; $tmp =~ s/ *$//; if ($tmp =~ /^\d*$/) { $indice = $tmp; next; } if ($a =~ s/^\"(.*)\".*/$1/) { $text = $1; next; } if ($a =~ /^=/) { $a =~ s/^=//; $a =~ '^"(.*)"$'; $text = $1 ? $1 : $a; next; } $a =~ s/^@//; my ($attname, $attvalue) = split '=', $a; next unless $attname; if ($attvalue) { $attvalue =~ '"(.*)"'; $attvalue = $1 if $1; } $attname =~ s/^ *//; $attname =~ s/ *$//; $attributes{$attname} = $attvalue; } } if (defined $indice) { $node = $self->getNodeByXPath ($posnode, "$name\[$indice\]"); } else { $node = $self->getChildElementByName($posnode, $name); } if ($node) { $self->setAttributes($node, %attributes); $self->setText($node, $text) if (defined $text); } else { $node = $self->appendElement ( $posnode, $name, text => $text, attributes => {%attributes} ); } if ($node) { $posnode = $node; } else { return undef; } } return $posnode; } #------------------------------------------------------------------------------ # selects element by path and attribute sub selectElementByAttribute { my $self = shift; my $path = shift; my $key = shift; my $value = shift || ''; my @candidates = $self->getElementList($path); return @candidates unless $key; for (@candidates) { if ($_->isElementNode) { my $v = $self->getAttribute($_, $key); return $_ if (defined $v && ($v =~ /$value/)); } } return undef; } #------------------------------------------------------------------------------ # selects list of elements by path and attribute sub selectElementsByAttribute { my $self = shift; my $path = shift; my $key = shift; my $value = shift || ''; my @candidates = $self->getElementList($path); return @candidates unless $key; my @selection = (); for (@candidates) { if ($_->isElementNode) { my $v = $self->getAttribute($_, $key); push @selection, $_ if ($v && ($v =~ /$value/)); } } return wantarray ? @selection : $selection[0]; } #------------------------------------------------------------------------------ # get a list of elements matching a given path and an optional content pattern sub findElementList { my $self = shift; my $path = shift; my $pattern = shift; my $replace = shift; return undef unless $path; my @result = (); my $context = $self->{'context'}; $path =~ s/^\/\//\.\// unless $context eq $self->{'xpath'}; foreach my $n ($context->findnodes($path)) { push @result, [ $self->findDescendants($n, $pattern, $replace, @_) ]; } return @result; } #------------------------------------------------------------------------------ # get a list of elements matching a given path and an optional content pattern # without replacement operation, and from an optional context node sub selectElements { my $self = shift; my $path = shift; my $context = $self->{'xpath'}; if (ref $path) { $context = $path; $path = shift; } my $filter = shift; my @candidates = $self->selectNodesByXPath($context, $path); return @candidates unless $filter; my @result = (); while (@candidates) { my $node = shift @candidates; push @result, $node if $self->_search_content($node, $filter, @_, $node); } return @result; } #------------------------------------------------------------------------------ # get the 1st element matching a given path and on optional content pattern sub selectElement { my $self = shift; my $path = shift; my $context = $self->{'xpath'}; if (ref $path) { $context = $path; $path = shift; } return undef unless $path; my $filter = shift; my @candidates = $self->selectNodesByXPath($context, $path); return $candidates[0] unless $filter; while (@candidates) { my $node = shift @candidates; return $node if $self->_search_content($node, $filter, @_, $node); } return undef; } #------------------------------------------------------------------------------ # gets the descendants of a given node, with optional in fly search/replacement sub findDescendants { my $self = shift; my $node = shift; my $pattern = shift; my $replace = shift; my @result = (); my $n = $self->selectNodeByContent($node, $pattern, $replace, @_); push @result, $n if $n; foreach my $m ($node->getChildNodes) { push @result, [ $self->findDescendants($m, $pattern, $replace, @_) ]; } return @result; } #------------------------------------------------------------------------------ # search & replace text in an individual node sub selectNodeByContent { my $self = shift; my $node = shift; my $pattern = shift; my $replace = shift; return $node unless $pattern; my $l = $node->getNodeValue; return undef unless $l; unless (defined $replace) { return ($l =~ /$pattern/) ? $node : undef; } else { if (ref $replace) { unless ($l =~ s/($pattern)/&$replace(@_, $node, $1)/eg) { return undef; } } else { unless ($l =~ s/$pattern/$replace/g) { return undef; } } $node->setNodeValue($l); return $node; } } #------------------------------------------------------------------------------ # gets the text content of a nodelist sub getTextList { my $self = shift; my $path = shift; my $pattern = shift; my $context = shift; return undef unless $path; $context = $self->{'context'} unless $context; $path =~ s/^\/\//\.\// unless $context eq $self->{'xpath'}; my @nodelist = $context->findnodes($path); my @text = (); foreach my $n (@nodelist) { my $l = $self->outputTextConversion($n->string_value); push @text, $l if ((! defined $pattern) || ($l =~ /$pattern/)); } return wantarray ? @text : join "\n", @text; } #------------------------------------------------------------------------------ # gets the attributes of an element in the key => value form sub getAttributes { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; return undef unless $path; $pos = 0 unless $pos; my $node = $self->getElement($path, $pos, @_); return undef unless $path; my %attributes = (); my $aa = $node->atts(@_); my %atts = %{$aa} if $aa; foreach my $a (keys %atts) { $attributes{$a} = $self->outputTextConversion($atts{$a}); } return %attributes; } #------------------------------------------------------------------------------ # gets the value of an attribute by path + name sub getAttribute { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $name = shift; return undef unless $path; $pos = 0 unless $pos; my $node = $self->getElement($path, $pos, @_); return $self->outputTextConversion($node->att($name)); } #------------------------------------------------------------------------------ # set/replace a list of attributes in an element sub setAttributes { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my %attr = @_; my $node = $self->getElement($path, $pos, $attr{'context'}); return undef unless $node; foreach my $k (keys %attr) { if (defined $attr{$k}) { $node->set_att ( $k, $self->inputTextConversion($attr{$k}) ); } else { $node->del_att($k) if $node->att($k); } } return %attr; } #------------------------------------------------------------------------------ # set/replace a single attribute in an element sub setAttribute { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $attribute = shift or return undef; my $value = shift; my $node = $self->getElement($path, $pos, @_) or return undef; if (defined $value) { $node->set_att ( $attribute, $self->inputTextConversion($value) ); } else { $node->del_att($attribute) if $node->att($attribute); } return $value; } #------------------------------------------------------------------------------ # removes an attribute in element sub removeAttribute { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $name = shift; my $node = $self->getElement($path, $pos, @_); return undef unless $node; return $node->del_att($name) if $node->att($name); } #------------------------------------------------------------------------------ # replicates an existing element, provided as an XPath ref or an XML string sub replicateElement { my $self = shift; my $proto = shift; my $position = shift; my %options = @_; unless ($proto && ref $proto && $proto->isElementNode) { warn "[" . __PACKAGE__ . "::replicateElement] No prototype\n"; return undef; } $position = 'end' unless $position; my $element = $proto->copy; $self->setAttributes($element, %{$options{'attribute'}}); if (ref $position) { if (! $options{'position'}) { $element->paste_last_child($position); } elsif ($options{'position'} eq 'before') { $element->paste_before($position); } elsif ($options{'position'} eq 'after') { $element->paste_after($position); } elsif ($options{'position'} ne 'free') { warn "[" . __PACKAGE__ . "::replicateElement] " . "No valid attachment option\n"; } } elsif ($position eq 'end') { $element->paste_last_child($self->{'xpath'}->root); } elsif ($position eq 'body') { $element->paste_last_child($self->getBody); } return $element; } #------------------------------------------------------------------------------ # create an element, just with a mandatory name and an optional text # the name can have the namespace:name form # if the $name argument is a '<.*>' string, it's processed as XML and # the new element is completely generated from it sub createElement { my $self = shift; my $name = shift; my $text = shift; my $element = OpenOffice::OODoc::XPath::new_element($name, @_); unless ($element) { warn "[" . __PACKAGE__ . "::createElement] " . "Element creation failure\n"; return undef; } $self->setText($element, $text) if defined $text; return $element; } #------------------------------------------------------------------------------ # replaces an element by another one # the new element is inserted before the old one, # then the old element is removed. # the new element can be inserted by copy (default) or by reference # return = new element if success, undef if failure sub replaceElement { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $new_element = shift; my %options = ( mode => 'copy', @_ ); unless ($new_element) { warn "[" . __PACKAGE__ . "::replaceElement] " . "Missing new element\n"; return undef; } unless (ref $new_element) { $new_element = $self->createElement($new_element); $options{'mode'} = 'reference'; } unless ($new_element && $new_element->isElementNode) { warn "[" . __PACKAGE__ . "::replaceElement] " . "No valid replacement\n"; return undef; } my $result = undef; my $old_element = $self->getElement ($path, $pos, $options{'context'}); unless ($old_element) { warn "[" . __PACKAGE__ . "::replaceElement] " . "Non existing element to be replaced\n"; return undef; } if (! $options{'mode'} || $options{'mode'} eq 'copy') { $result = $new_element->copy; $result->replace($old_element); return $result; } elsif ($options{'mode'} && $options{'mode'} eq 'reference') { $result = $self->insertElement ( $old_element, $new_element, position => 'before' ); $self->removeElement($old_element); return $result; } else { warn "[" . __PACKAGE__ . "::replaceElement] " . "Unknown option\n"; } return undef; } #------------------------------------------------------------------------------ # adds a new or existing child element sub appendElement { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $name = shift; my %opt = @_; $opt{'attribute'} = $opt{'attributes'} unless ($opt{'attribute'}); return undef unless $name; my $element = undef; unless (ref $name) { $element = $self->createElement($name, $opt{'text'}); } else { $element = $name; $self->setText($element, $opt{'text'}) if $opt{'text'}; } return undef unless $element; my $parent = $self->getElement ($path, $pos, $opt{'context'}); unless ($parent) { warn "[" . __PACKAGE__ . "::appendElement] Position not found\n"; return undef; } $element->paste_last_child($parent); $self->setAttributes($element, %{$opt{'attribute'}}); return $element; } #------------------------------------------------------------------------------ # inserts a new element before or after a given node # as appendElement, but the new element is a 'brother' (and not a child) of # the first given element sub insertElement { my $self = shift; my $path = shift; my $pos = (ref $path) ? undef : shift; my $name = shift; my %opt = @_; return undef unless $name; my $element = undef; unless (ref $name) { $element = $self->createElement($name, $opt{'text'}); } else { $element = $name; $self->setText($element, $opt{'text'}) if $opt{'text'}; } return undef unless $element; my $posnode = $self->getElement($path, $pos, $opt{'context'}); unless ($posnode) { warn "[" . __PACKAGE__ . "::insertElement] Unknown position\n"; return undef; } if (($opt{'position'}) && ($opt{'position'} eq 'after')) { $element->paste_after($posnode); } else { $element->paste_before($posnode); } $self->setAttributes($element, %{$opt{'attribute'}}); return $element; } #------------------------------------------------------------------------------ # removes the given element & children sub removeElement { my $self = shift; my $e = $self->getElement(@_); return undef unless $e; return $e->delete; } #------------------------------------------------------------------------------ # cuts the given element & children (to be pasted elsewhere) sub cutElement { my $self = shift; my $e = $self->getElement(@_); return undef unless $e; $e->cut; return $e; } #------------------------------------------------------------------------------ # some extensions for XML Twig elements package OpenOffice::OODoc::Element; our @ISA = qw ( XML::Twig::Elt ); #------------------------------------------------------------------------------ sub hasTagName { my $node = shift; my $name = $node->getName; my $value = shift; return ($name && ($name eq $value)) ? 1 : undef; } sub getLocalPosition { my $node = shift; my $tag = $node->getName or return undef; my $xpos = $node->pos($tag); return defined $xpos ? $xpos - 1 : undef; } sub setName { my $node = shift; return $node->set_tag(shift); } sub getPrefix { my $node = shift; return $node->ns_prefix; } sub selectChildElements { my $node = shift; my $filter = shift; return $node->getChildNodes() unless $filter; my @list = (); my $fc = $node->first_child; my $name = $fc->name if $fc; while ($fc) { if ($name && ($name =~ /$filter/)) { return $fc unless wantarray; push @list, $fc; } $fc = $fc->next_sibling; $name = $fc->name if $fc;; } return @list; } sub selectChildElement { my $node = shift; my $filter = shift; my $pos = shift || 0; my $count = 0; my $fc = $node->first_child; my $name = $fc->name if $fc; while ($fc) { if ($name && ($name =~ /$filter/)) { return $fc if ($count >= $pos); $count++; } $fc = $fc->next_sibling; $name = $fc->name if $fc; } return undef; } sub getParentNode { my $node = shift; return $node->parent; } sub getFirstChild { my $node = shift; my $fc = $node->first_child(@_); my $name = $fc->name if $fc; while ($name && ($name =~ /^#/)) { $fc = $fc->next_sibling(@_); $name = $fc->name if $fc; } return $fc; } sub getLastChild { my $node = shift; my $lc = $node->last_child(@_); my $name = $lc->name; while ($name && ($name =~ /^#/)) { $lc = $lc->prev_sibling(@_); $name = $lc->name; } return $lc; } sub getDescendantNodes { my $node = shift; my @children = $node->getChildNodes(@_); my @descendants = (); foreach my $child (@children) { push @descendants, $child, $child->getDescendantNodes(@_); } return @descendants; } sub getDescendantTextNodes { my $node = shift; return $node->descendants('#PCDATA'); } sub appendChild { my $node = shift; my $child = shift; unless (ref $child) { $child = OpenOffice::OODoc::XPath::new_element($child, @_); } return $child->paste_last_child($node); } sub removeChildNodes { my $node = shift; return $node->cut_children(@_); } sub replicateNode { my $node = shift; my $number = shift || 1; my $position = shift || 'after'; my $lastnode = $node; while ($number > 0) { my $newnode = $node->copy; $newnode->paste($position => $lastnode); $last_node = $newnode; $number--; } return $last_node; } sub getNodeValue { my $node = shift; return $node->text; } sub getValue { my $node = shift; return $node->text; } sub setNodeValue { my $node = shift; return $node->set_text(@_); } sub appendTextChild { my $node = shift; my $text = shift; return undef unless defined $text; my $text_node = OpenOffice::OODoc::Element->new('#PCDATA' => $text); return $text_node->paste(last_child => $node); } sub getAttribute { my $node = shift; return $node->att(@_); } sub getAttributes { my $node = shift; return %{$node->atts(@_)}; } sub setAttribute { my $node = shift or return undef; my $attribute = shift; my $value = shift; if (defined $value) { return $node->set_att($attribute, $value, @_); } else { if ($node->{$attribute}) { return $node->del_att($attribute); } else { return undef; } } } sub removeAttribute { my $node = shift or return undef; return $node->del_att(shift); } #------------------------------------------------------------------------------ 1;