#-----------------------------------------------------------------------------
#
# $Id : XPath.pm 1.202 2005-02-17 JMG$
#
# Initial developer: Jean-Marie Gouarne
# Copyright 2005 by Genicorp, S.A. (www.genicorp.com)
# Licensing conditions:
# - Licence Publique Generale Genicorp v1.0
# - GNU Lesser General Public License v2.1
# Contact: oodoc@genicorp.com
#
#-----------------------------------------------------------------------------
package OpenOffice::OODoc::XPath;
use 5.008_000;
our $VERSION = 1.202;
use XML::Twig 3.15;
use Encode;
#------------------------------------------------------------------------------
our %XMLNAMES = # OODoc root element names
(
'content' => 'office:document-content',
'styles' => 'office:document-styles',
'meta' => 'office:document-meta',
'manifest' => 'manifest:manifest',
'settings' => 'office:document-settings'
);
# characters to be escaped in XML
our $CHARS_TO_ESCAPE = "\"<>'&";
# standard external character set
our $LOCAL_CHARSET = 'iso-8859-1';
# OpenOffice.org character set
our $OO_CHARSET = 'utf8';
#------------------------------------------------------------------------------
# basic conversion between internal & printable encodings
sub OpenOffice::OODoc::XPath::decode_text
{
return Encode::encode($LOCAL_CHARSET, shift);
}
sub OpenOffice::OODoc::XPath::encode_text
{
return Encode::decode($LOCAL_CHARSET, shift);
}
#------------------------------------------------------------------------------
# basic element creation
sub OpenOffice::OODoc::XPath::new_element
{
my $name = shift or return undef;
return undef if ref $name;
$name =~ s/^\s+//;
$name =~ s/\s+$//;
if ($name =~ /^<.*>$/) # create element from XML string
{
return XML::Twig::Elt->parse($name, @_);
}
else # create element from name and optional data
{
return XML::Twig::Elt->new($name, @_);
}
}
#------------------------------------------------------------------------------
# text node creation
sub OpenOffice::OODoc::XPath::new_text_node
{
return OpenOffice::OODoc::XPath::new_element('#PCDATA');
}
#------------------------------------------------------------------------------
# some wrappers for XML Twig elements
package XML::Twig::Elt;
#------------------------------------------------------------------------------
sub setName
{
my $node = shift;
return $node->set_tag(shift);
}
sub getPrefix
{
my $node = shift;
return $node->ns_prefix;
}
sub selectChildElements
{
my $node = shift;
my $filter = shift;
return $node->getChildNodes() unless $filter;
my @list = ();
my $fc = $node->first_child;
my $name = $fc->name if $fc;
while ($fc)
{
if ($name && ($name =~ /$filter/))
{
return $fc unless wantarray;
push @list, $fc;
}
$fc = $fc->next_sibling;
$name = $fc->name if $fc;;
}
return @list;
}
sub selectChildElement
{
my $node = shift;
my $filter = shift;
my $pos = shift || 0;
my $count = 0;
my $fc = $node->first_child;
my $name = $fc->name if $fc;
while ($fc)
{
if ($name && ($name =~ $filter))
{
return $fc if ($count >= $pos);
$count++;
}
$fc = $fc->next_sibling;
$name = $fc->name if $fc;
}
return undef;
}
sub getParentNode
{
my $node = shift;
return $node->parent;
}
sub getFirstChild
{
my $node = shift;
my $fc = $node->first_child(@_);
my $name = $fc->name if $fc;
while ($name && ($name =~ /^#/))
{
$fc = $fc->next_sibling(@_);
$name = $fc->name if $fc;
}
return $fc;
}
sub getLastChild
{
my $node = shift;
my $lc = $node->last_child(@_);
my $name = $lc->name;
while ($name && ($name =~ /^#/))
{
$lc = $lc->prev_sibling(@_);
$name = $lc->name;
}
return $lc;
}
sub getDescendantNodes
{
my $node = shift;
my @children = $node->getChildNodes(@_);
my @descendants = ();
foreach my $child (@children)
{
push @descendants, $child, $child->getDescendantNodes(@_);
}
return @descendants;
}
sub getDescendantTextNodes
{
my $node = shift;
return $node->descendants('#PCDATA');
}
sub appendChild
{
my $node = shift;
my $child = shift;
unless (ref $child)
{
$child = OpenOffice::OODoc::XPath::new_element($child, @_);
}
return $child->paste_last_child($node);
}
sub removeChildNodes
{
my $node = shift;
return $node->cut_children(@_);
}
sub replicateNode
{
my $node = shift;
my $number = shift || 1;
my $position = shift || 'after';
my $lastnode = $node;
while ($number > 0)
{
my $newnode = $node->copy;
$newnode->paste($position => $lastnode);
$last_node = $newnode;
$number--;
}
return $last_node;
}
sub getNodeValue
{
my $node = shift;
return $node->text;
}
sub getValue
{
my $node = shift;
return $node->text;
}
sub setNodeValue
{
my $node = shift;
return $node->set_text(@_);
}
sub appendTextChild
{
my $node = shift;
my $text = shift or return undef;
my $text_node = XML::Twig::Elt->new('#PCDATA' => $text);
return $text_node->paste(last_child => $node);
}
sub getAttribute
{
my $node = shift;
return $node->att(@_);
}
sub getAttributes
{
my $node = shift;
return %{$node->atts(@_)};
}
sub setAttribute
{
my $node = shift or return undef;
my $attribute = shift;
my $value = shift;
if (defined $value)
{
return $node->set_att($attribute, $value, @_);
}
else
{
if ($node->{$attribute})
{
return $node->del_att($attribute);
}
else
{
return undef;
}
}
}
sub removeAttribute
{
my $node = shift or return undef;
return $node->del_att(shift);
}
#------------------------------------------------------------------------------
package OpenOffice::OODoc::XPath;
#------------------------------------------------------------------------------
# basic conversion between internal & printable encodings (object version)
sub inputTextConversion
{
my $self = shift;
my $text = shift;
my $local_encoding = $self->{'local_encoding'} or return $text;
return Encode::decode($local_encoding, $text);
}
sub outputTextConversion
{
my $self = shift;
my $text = shift;
my $local_encoding = $self->{'local_encoding'} or return $text;
return Encode::encode($local_encoding, $text);
}
sub localEncoding
{
my $self = shift;
my $encoding = shift;
$self->{'local_encoding'} = $encoding if $encoding;
return $self->{'local_encoding'} || '';
}
sub noLocalEncoding
{
my $self = shift;
delete $self->{'local_encoding'};
return 1;
}
#------------------------------------------------------------------------------
# search/replace text processing routine
# if $replace is a user-provided routine, it's called back with
# the current argument stack, plus the substring found
sub _find_text
{
my $self = shift;
my $text = shift;
my $pattern = $self->inputTextConversion(shift);
my $replace = shift;
if (defined $pattern)
{
if (defined $replace)
{
if (ref $replace)
{
if ((ref $replace) eq 'CODE')
{
return undef
unless
(
$text =~
s/($pattern)/
{
my $found = $1;
Encode::_utf8_on($found)
if Encode::is_utf8($text);
my $result = &$replace(@_, $found);
$result = $found
unless (defined $result);
$result;
}
/eg
);
}
else
{
return undef unless ($text =~ /$pattern/);
}
}
else
{
my $r = $self->inputTextConversion($replace);
return undef unless ($text =~ s/$pattern/$r/g);
}
}
else
{
return undef unless ($text =~ /$pattern/);
}
}
return $text;
}
#------------------------------------------------------------------------------
# search/replace content in descendant nodes
sub _search_content
{
my $self = shift;
my $element = shift or return undef;
my $content = undef;
foreach my $child ($element->children)
{
my $text = undef;
if ($child->isTextNode)
{
$text = $self->_find_text($child->text, @_);
$child->set_text($text) if defined $text;
}
else
{
my $t = $self->_search_content($child, @_);
$text .= $t if defined $t;
}
$content .= $text if defined $text;
}
return $content;
}
#------------------------------------------------------------------------------
# document class check
sub isContent
{
my $self = shift;
return ($self->contentClass()) ? 1 : undef;
}
sub isCalcDocument
{
my $self = shift;
return ($self->contentClass() eq 'spreadsheet') ? 1 : undef;
}
sub isImpressDocument
{
my $self = shift;
return ($self->contentClass() eq 'impress') ? 1 : undef;
}
sub isDrawDocument
{
my $self = shift;
return ($self->contentClass() eq 'drawing') ? 1 : undef;
}
sub isWriterDocument
{
my $self = shift;
return ($self->contentClass() eq 'text') ? 1 : undef;
}
#------------------------------------------------------------------------------
# constructor; accepts one from 3 types of parameters to create an instance:
# file => a regular OpenOffice.org filename
# archive => an OODoc::File, previously created object
# xml => an XML string, representing an OO XML member
# if 'file' or 'archive' (not 'xml') is provided, another parameter 'member'
# must be provided in addition
# member => member of the zip archive (meta.xml, content.xml, ...)
sub new
{
my $caller = shift;
my $class = ref($caller) || $caller;
my $self =
{
body_path => '//office:body',
auto_style_path => '//office:automatic-styles',
master_style_path => '//office:master-styles',
named_style_path => '//office:styles',
local_encoding =>
$OpenOffice::OODoc::XPath::LOCAL_CHARSET,
@_
};
my $twig = undef;
if ($self->{'member'} && ! $self->{'element'})
{
my $m = lc $self->{'member'};
$m =~ /(^.*)\..*/;
$m = $1 if $1;
$self->{'element'} =
$OpenOffice::OODoc::XPath::XMLNAMES{$m};
}
# create the XML::Twig
if ($self->{'readable_XML'} && ($self->{'readable_XML'} eq 'on'))
{
$self->{'readable_XML'} = 'indented';
}
$self->{'element'} = $OpenOffice::OODoc::XPath::XMLNAMES{'content'}
unless $self->{'element'};
if ($self->{'element'})
{
$twig = XML::Twig->new
(
twig_roots =>
{
$self->{'element'} => 1
},
pretty_print => $self->{'readable_XML'},
%{$self->{'twig_options'}}
);
}
else
{
$twig = XML::Twig->new
(
pretty_print => $self->{'readable_XML'},
%{$self->{'twig_options'}}
);
}
if ($self->{'xml'}) # load from XML string
{
$self->{'xpath'} = $twig->safe_parse($self->{'xml'});
}
elsif ($self->{'archive'}) # load from existig OOFile
{
$self->{'member'} = 'content' unless $self->{'member'};
$self->{'xml'} = $self->{'archive'}->link($self);
$self->{'xpath'} = $twig->safe_parse($self->{'xml'});
}
elsif ($self->{'file'}) # look for a file
{
unless (
$self->{'flat_xml'}
||
(lc $self->{'file'}) =~ /\.xml$/
)
{ # create OOFile & extract from it
require OpenOffice::OODoc::File;
$self->{'archive'} = OpenOffice::OODoc::File->new
(
$self->{'file'},
create => $self->{'create'},
template_path => $self->{'template_path'}
);
$self->{'member'} = 'content'
unless $self->{'member'};
$self->{'xml'} = $self->{'archive'}->link($self);
$self->{'xpath'} = $twig->safe_parse($self->{'xml'});
}
else
{ # load from XML flat file
$self->{'xpath'} =
$twig->safe_parsefile($self->{'file'});
}
}
else
{
warn "[" . __PACKAGE__ . "] No XML content\n";
return undef;
}
delete $self->{'xml'};
unless ($self->{'xpath'})
{
warn "[" . __PACKAGE__ . "] No well formed content\n";
return undef;
}
$self->{'twig'} = $twig;
return bless $self, $class;
}
#------------------------------------------------------------------------------
# destructor
sub DESTROY
{
my $self = shift;
delete $self->{'file'};
delete $self->{'xpath'};
delete $self->{'xml'};
delete $self->{'twig'};
delete $self->{'archive'};
delete $self->{'twig_options'};
$self = {};
}
sub dispose
{
my $self = shift;
$self->DESTROY(@_);
}
#------------------------------------------------------------------------------
# get a reference to the embedded XML parser for share
sub getXMLParser
{
warn "[" . __PACKAGE__ . "::getXMLParser] No longer implemented\n";
return undef;
}
#------------------------------------------------------------------------------
# make the changes persistent in an OpenOffice.org file
sub save
{
my $self = shift;
my $target = shift;
my $filename = ($target) ? $target : $self->{'file'};
my $archive = $self->{'archive'};
unless ($archive)
{
if ($filename)
{
open my $fh, ">:utf8", $filename;
$self->exportXMLContent($fh);
close $fh;
return $filename;
}
else
{
warn "[" . __PACKAGE__ . "::save] No archive object\n";
return undef;
}
}
$filename = $archive->{'source_file'} unless $filename;
unless ($filename)
{
warn "[" . __PACKAGE__ . "::save] No target file\n";
return undef;
}
my $member = $self->{'member'};
unless ($member)
{
warn "[" . __PACKAGE__ . "::save] No member\n";
return undef;
}
my $result = $archive->save($filename);
return $result;
}
sub update
{
my $self = shift;
return $self->save(@_);
}
#------------------------------------------------------------------------------
# raw file import
sub raw_import
{
my $self = shift;
if ($self->{'archive'})
{
my $target = shift;
unless ($target)
{
warn "[" . __PACKAGE__ . "::raw_import] " .
"No target member for import\n";
return undef;
}
$target =~ s/^#//;
return $self->{'archive'}->raw_import($target, @_);
}
else
{
warn "[" . __PACKAGE__ . "::raw_import] " .
"No archive for file import\n";
return undef;
}
}
#------------------------------------------------------------------------------
# raw file export
sub raw_export
{
my $self = shift;
if ($self->{'archive'})
{
my $source = shift;
unless ($source)
{
warn "[" . __PACKAGE__ . "::raw_import] " .
"Missing source file name\n";
return undef;
}
$source =~ s/^#//;
return $self->{'archive'}->raw_export($source, @_);
}
else
{
warn "[" . __PACKAGE__ . "::raw_import] " .
"No archive for file export\n";
return undef;
}
}
#------------------------------------------------------------------------------
# exports the whole content of the document as an XML string
sub exportXMLContent
{
my $self = shift;
my $target = shift;
if ($target)
{
return $self->{'twig'}->print($target);
}
else
{
return $self->{'twig'}->sprint;
}
}
sub getXMLContent
{
my $self = shift;
return $self->exportXMLContent(@_);
}
sub getContent
{
my $self = shift;
return $self->exportXMLContent(@_);
}
#------------------------------------------------------------------------------
# brute force tree reorganization
sub reorganize
{
warn "[" . __PACKAGE__ . "::reorganize] No longer implemented\n";
return undef;
}
#------------------------------------------------------------------------------
# returns the root of the XML document
sub getRoot
{
my $self = shift;
return $self->{'xpath'}->root;
}
#------------------------------------------------------------------------------
# returns the root element of the XML document
sub getRootElement
{
my $self = shift;
my $root = $self->{'xpath'}->root;
my $rootname = $root->name() || '';
return ($rootname eq $self->{'element'}) ?
$root :
$root->first_child($self->{'element'});
}
#------------------------------------------------------------------------------
# returns the content class (text, spreadsheet, presentation, drawing)
sub contentClass
{
my $self = shift;
my $class = shift;
my $element = $self->getRootElement;
return undef unless $element;
$self->setAttribute($element, 'office:class', $class) if $class;
return $self->getAttribute($element, 'office:class') || '';
}
#------------------------------------------------------------------------------
# element name check
sub getRootName
{
my $self = shift;
return $self->getRootElement->name;
}
#------------------------------------------------------------------------------
# member type checks
sub isMeta
{
my $self = shift;
return ($self->getRootName() eq $XMLNAMES{'meta'}) ? 1 : undef;
}
sub isStyles
{
my $self = shift;
return ($self->getRootName() eq $XMLNAMES{'styles'}) ? 1 : undef;
}
sub isSettings
{
my $self = shift;
return ($self->getRootName() eq $XMLNAMES{'settings'}) ? 1 : undef;
}
#------------------------------------------------------------------------------
# returns the document body element (if defined)
sub getBody
{
my $self = shift;
return $self->getRootElement->selectChildElement
(
'office:(body|meta|master-styles|settings)'
);
}
#------------------------------------------------------------------------------
# makes the current OODoc::XPath object share the same content as another one
sub cloneContent
{
$self = shift;
$source = shift;
unless ($source && $source->{'xpath'})
{
warn "[" . __PACKAGE__ . "]::cloneContent - No valid source\n";
return undef;
}
$self->{'xpath'} = $source->{'xpath'};
$self->{'begin'} = $source->{'begin'};
$self->{'xml'} = $source->{'xml'};
$self->{'end'} = $source->{'end'};
return $self->getRoot;
}
#------------------------------------------------------------------------------
# exports an individual element as an XML string
sub exportXMLElement
{
my $self = shift;
my $path = shift;
my $element =
(ref $path) ? $path : $self->getElement($path, shift);
my $text = $element->sprint(@_);
return $text;
}
#------------------------------------------------------------------------------
# exports the document body (if defined) as an XML string
sub exportXMLBody
{
my $self = shift;
return $self->exportXMLElement($self->getBody, @_);
}
#------------------------------------------------------------------------------
# gets the reference of an XML element identified by path & position
# for subsequent processing
sub getElement
{
my $self = shift;
my $path = shift;
return undef unless $path;
if (ref $path)
{
return $path->isElementNode ? $path : undef;
}
my $pos = shift || 0;
if (defined $pos && (($pos =~ /^\d*$/) || ($pos =~ /^[\d+-]\d+$/)))
{
my $context = shift;
$context = $self->{'xpath'} unless ref $context;
my $node = $context->get_xpath($path, $pos);
return $node && $node->isElementNode ? $node : undef;
}
else
{
warn "[" . __PACKAGE__ . "::getElement] " .
"Invalid node position\n";
return undef;
}
}
#------------------------------------------------------------------------------
# get the list of children (or the first child unless wantarray) matching
# a given element name and belonging to a given element
sub selectChildElementsByName
{
my $self = shift;
my $path = shift;
my $element = ref $path ? $path : $self->getElement($path, shift);
return undef unless $element;
return $element->selectChildElements(@_);
}
#------------------------------------------------------------------------------
# get the first child belonging to a given element and matching a given name
sub selectChildElementByName
{
my $self = shift;
my $path = shift;
my $element = ref $path ? $path : $self->getElement($path, shift);
return undef unless $element;
return $element->selectChildElement(@_);
}
#------------------------------------------------------------------------------
# get the first child belonging to a given element with an exact given name
sub getChildElementByName
{
my $self = shift;
return $self->selectChildElementByName(@_);
}
#------------------------------------------------------------------------------
# replaces any previous content of an existing element by a given text
sub setText
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $text = shift;
return undef unless defined $text;
my $element = $self->OpenOffice::OODoc::XPath::getElement
($path, $pos);
return undef unless $element;
$element->set_text("");
my @lines = split "\n", $text;
while (@lines)
{
my $line = shift @lines;
my @columns = split "\t", $line;
while (@columns)
{
my $column =
$self->inputTextConversion(shift @columns);
$element->appendTextChild($column);
$element->appendChild('text:tab-stop') if (@columns);
}
$element->appendChild('text:line-break') if (@lines);
}
return $text;
}
#------------------------------------------------------------------------------
# extends the text of an existing element
sub extendText
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $text = shift;
return undef unless defined $text;
my $element = $self->getElement($path, $pos);
return undef unless $element;
my @lines = split "\n", $text;
while (@lines)
{
my $line = shift @lines;
my @columns = split "\t", $line;
while (@columns)
{
my $column =
$self->inputTextConversion(shift @columns);
$element->appendTextChild($column);
$element->appendChild('text:tab-stop') if (@columns);
}
$element->appendChild('text:line-break') if (@lines);
}
return $text;
}
#------------------------------------------------------------------------------
# creates a new encoded text node
sub createTextNode
{
my $self = shift;
my $text = shift or return undef;
my $content = $self->inputTextConversion($text);
return XML::Twig::Elt->new('#PCDATA' => $text);
}
#------------------------------------------------------------------------------
# replaces substring in an element
sub replaceText
{
my $self = shift;
my $path = shift;
my $element = (ref $path) ?
$path :
$self->getElement($path, shift);
return $self->_search_content($element, @_);
}
#------------------------------------------------------------------------------
# gets text in element by path (sub-element texts are concatenated)
sub getText
{
my $self = shift;
my $element = $self->OpenOffice::OODoc::XPath::getElement(@_);
return undef unless ($element && $element->isElementNode);
my $text = '';
my $name = $element->getName;
if ($name eq 'text:tab-stop') { return "\t"; }
if ($name eq 'text:line-break') { return "\n"; }
foreach my $node ($element->getChildNodes)
{
if ($node->isElementNode)
{
$text .=
(
$self->getText($node)
||
''
);
}
else
{
my $t = ($node->getValue() || '');
$text .= $self->outputTextConversion($t);
}
}
return $text;
}
#------------------------------------------------------------------------------
# returns the children of a given element
sub getElementList
{
my $self = shift;
my $path = shift;
my $context = shift;
if ($context)
{
$path = "./" unless $path;
}
else
{
$path = "/" unless $path;
$context = $self->{'xpath'};
}
return $context->findnodes($path);
}
#------------------------------------------------------------------------------
# brute XPath nodelist selection; allows any XML::XPath expression
sub selectNodesByXPath
{
my $self = shift;
my ($p1, $p2) = @_;
my $path = undef;
my $context = undef;
if (ref $p1) { $context = $p1; $path = $p2; }
else { $path = $p1; $context = $p2; }
$context = $self->{'xpath'} unless ref $context;
return $context->get_xpath($path);
}
#------------------------------------------------------------------------------
# brute XPath single node selection; allows any XML::XPath expression
sub selectNodeByXPath
{
my $self = shift;
my ($p1, $p2) = @_;
my $path = undef;
my $context = undef;
if (ref $p1) { $context = $p1; $path = $p2; }
else { $path = $p1; $context = $p2; }
$context = $self->{'xpath'} unless ref $context;
return $context->get_xpath($path, 0);
}
sub getNodeByXPath
{
my $self = shift;
return $self->selectNodeByXPath(@_);
}
#------------------------------------------------------------------------------
# brute XPath value extraction; allows any XML::XPath expression
sub getXPathValue
{
my $self = shift;
my ($p1, $p2) = @_;
my $path = undef;
my $context = undef;
if (ref $p1) { $context = $p1; $path = $p2; }
else { $path = $p1; $context = $p2; }
if (ref $context)
{
$path =~ s/^\/*//;
}
else
{
$context = $self->{'xpath'};
}
return $self->outputTextConversion($context->findvalue($path, @_));
}
#------------------------------------------------------------------------------
# create or update an xpath
sub makeXPath
{
my $self = shift;
my $path = shift;
my $root = undef;
if (ref $path)
{
$root = $path;
$path = shift;
}
else
{
$root = $self->getRoot;
}
$path =~ s/^[\/ ]*//; $path =~ s/[\/ ]*$//;
my @list = split '/', $path;
my $posnode = $root;
while (@list)
{
my $item = shift @list;
while (($item =~ /\[.*/) && !($item =~ /\[.*\]/))
{
my $cont = shift @list or last;
$item .= ('/' . $cont);
}
next unless $item;
my $node = undef;
my $name = undef;
my $param = undef;
$item =~ s/\[(.*)\] *//;
$param = $1;
$name = $item; $name =~ s/^ *//; $name =~ s/ *$//;
my %attributes = ();
my $text = undef;
my $indice = undef;
if ($param)
{
my @attrlist = [];
$indice = undef;
$param =~ s/^ *//; $param =~ s/ *$//;
$param =~ s/^@//;
@attrlist = split /@/, $param;
foreach my $a (@attrlist)
{
next unless $a;
$a =~ s/^ *//;
my $tmp = $a;
$tmp =~ s/ *$//;
if ($tmp =~ /^\d*$/)
{
$indice = $tmp;
next;
}
if ($a =~ s/^\"(.*)\".*/$1/)
{
$text = $1; next;
}
if ($a =~ /^=/)
{
$a =~ s/^=//;
$a =~ '^"(.*)"$';
$text = $1 ? $1 : $a;
next;
}
$a =~ s/^@//;
my ($attname, $attvalue) = split '=', $a;
next unless $attname;
if ($attvalue)
{
$attvalue =~ '"(.*)"';
$attvalue = $1 if $1;
}
$attname =~ s/^ *//; $attname =~ s/ *$//;
$attributes{$attname} = $attvalue;
}
}
if (defined $indice)
{
$node = $self->getNodeByXPath
($posnode, "$name\[$indice\]");
}
else
{
$node =
$self->getChildElementByName($posnode, $name);
}
if ($node)
{
$self->setAttributes($node, %attributes);
$self->setText($node, $text) if (defined $text);
}
else
{
$node = $self->appendElement
(
$posnode, $name,
text => $text,
attributes => {%attributes}
);
}
if ($node) { $posnode = $node; }
else { return undef; }
}
return $posnode;
}
#------------------------------------------------------------------------------
# selects element by path and attribute
sub selectElementByAttribute
{
my $self = shift;
my $path = shift;
my $key = shift;
my $value = shift || '';
my @candidates = $self->getElementList($path);
return @candidates unless $key;
for (@candidates)
{
if ($_->isElementNode)
{
my $v = $self->getAttribute($_, $key);
return $_ if (defined $v && ($v =~ /$value/));
}
}
return undef;
}
#------------------------------------------------------------------------------
# selects list of elements by path and attribute
sub selectElementsByAttribute
{
my $self = shift;
my $path = shift;
my $key = shift;
my $value = shift || '';
my @candidates = $self->getElementList($path);
return @candidates unless $key;
my @selection = ();
for (@candidates)
{
if ($_->isElementNode)
{
my $v = $self->getAttribute($_, $key);
push @selection, $_ if ($v && ($v =~ /$value/));
}
}
return wantarray ? @selection : $selection[0];
}
#------------------------------------------------------------------------------
# get a list of elements matching a given path and an optional content pattern
sub findElementList
{
my $self = shift;
my $path = shift;
my $pattern = shift;
my $replace = shift;
return undef unless $path;
my @result = ();
foreach my $n ($self->{'xpath'}->findnodes($path))
{
push @result,
[ $self->findDescendants($n, $pattern, $replace, @_) ];
}
return @result;
}
#------------------------------------------------------------------------------
# get a list of elements matching a given path and an optional content pattern
# without replacement operation, and from an optional context node
sub selectElements
{
my $self = shift;
my $path = shift;
my $context = $self->{'xpath'};
if (ref $path)
{
$context = $path;
$path = shift;
}
my $filter = shift;
my @candidates = $self->selectNodesByXPath($context, $path);
return @candidates unless $filter;
my @result = ();
while (@candidates)
{
my $node = shift @candidates;
push @result, $node
if $self->_search_content($node, $filter, @_, $node);
}
return @result;
}
#------------------------------------------------------------------------------
# get the 1st element matching a given path and on optional content pattern
sub selectElement
{
my $self = shift;
my $path = shift;
my $context = $self->{'xpath'};
if (ref $path)
{
$context = $path;
$path = shift;
}
return undef unless $path;
my $filter = shift;
my @candidates = $self->selectNodesByXPath($context, $path);
return $candidates[0] unless $filter;
while (@candidates)
{
my $node = shift @candidates;
return $node
if $self->_search_content($node, $filter, @_, $node);
}
return undef;
}
#------------------------------------------------------------------------------
# gets the descendants of a given node, with optional in fly search/replacement
sub findDescendants
{
my $self = shift;
my $node = shift;
my $pattern = shift;
my $replace = shift;
my @result = ();
my $n = $self->selectNodeByContent($node, $pattern, $replace, @_);
push @result, $n if $n;
foreach my $m ($node->getChildNodes)
{
push @result,
[ $self->findDescendants($m, $pattern, $replace, @_) ];
}
return @result;
}
#------------------------------------------------------------------------------
# search & replace text in an individual node
sub selectNodeByContent
{
my $self = shift;
my $node = shift;
my $pattern = shift;
my $replace = shift;
return $node unless $pattern;
my $l = $node->getNodeValue;
return undef unless $l;
unless (defined $replace)
{
return ($l =~ /$pattern/) ? $node : undef;
}
else
{
if (ref $replace)
{
unless
($l =~ s/($pattern)/&$replace(@_, $node, $1)/eg)
{
return undef;
}
}
else
{
unless ($l =~ s/$pattern/$replace/g)
{
return undef;
}
}
$node->setNodeValue($l);
return $node;
}
}
#------------------------------------------------------------------------------
# gets the text content of a nodelist
sub getTextList
{
my $self = shift;
my $path = shift;
my $pattern = shift;
return undef unless $path;
my @nodelist = $self->{'xpath'}->findnodes($path);
my @text = ();
foreach my $n (@nodelist)
{
my $l = $self->outputTextConversion($n->string_value);
push @text, $l if ((! defined $pattern) || ($l =~ /$pattern/));
}
return wantarray ? @text : join "\n", @text;
}
#------------------------------------------------------------------------------
# gets the attributes of an element in the key => value form
sub getAttributes
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
return undef unless $path;
$pos = 0 unless $pos;
my $node = $self->getElement($path, $pos);
return undef unless $path;
my %attributes = ();
my %atts = %{$node->atts(@_)};
foreach my $a (keys %atts)
{
$attributes{$a} = $self->outputTextConversion($atts{$a});
}
return %attributes;
}
#------------------------------------------------------------------------------
# gets the value of an attribute by path + name
sub getAttribute
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $name = shift;
return undef unless $path;
$pos = 0 unless $pos;
my $node = $self->getElement($path, $pos);
return $self->outputTextConversion($node->att($name));
}
#------------------------------------------------------------------------------
# set/replace a list of attributes in an element
sub setAttributes
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my %attr = @_;
my $node = $self->getElement($path, $pos);
return undef unless $node;
foreach my $k (keys %attr)
{
if (defined $attr{$k})
{
$node->set_att
(
$k,
$self->inputTextConversion($attr{$k})
);
}
else
{
$node->del_att($a);
}
}
return %attr;
}
#------------------------------------------------------------------------------
# set/replace a single attribute in an element
sub setAttribute
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $node = $self->getElement($path, $pos) or return undef;
my $attribute = shift or return undef;
my $value = shift;
if (defined $value && ($value gt ' '))
{
$node->set_att
(
$attribute,
$self->inputTextConversion($value)
);
}
else
{
$node->del_att($a);
}
return $value;
}
#------------------------------------------------------------------------------
# removes an attribute in element
sub removeAttribute
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $name = shift;
my $node = $self->getElement($path, $pos);
return undef unless $node;
return $node->del_att($a);
}
#------------------------------------------------------------------------------
# replicates an existing element, provided as an XPath ref or an XML string
sub replicateElement
{
my $self = shift;
my $proto = shift;
my $position = shift;
my %options = @_;
unless ($proto && ref $proto && $proto->isElementNode)
{
warn "[" . __PACKAGE__ . "::replicateElement] No prototype\n";
return undef;
}
$position = 'end' unless $position;
my $element = $proto->copy;
$element->set_atts($options => 'attribute');
if (ref $position)
{
if (! $options{'position'})
{
$element->paste_last_child($position);
}
elsif ($options{'position'} eq 'before')
{
$element->paste_before($position);
}
elsif ($options{'position'} eq 'after')
{
$element->paste_after($position);
}
elsif ($options{'position'} ne 'free')
{
warn "[" . __PACKAGE__ . "::replicateElement] " .
"No valid attachment option\n";
}
}
elsif ($position eq 'end')
{
$element->paste_last_child($self->{'xpath'}->root);
}
elsif ($position eq 'body')
{
$element->paste_last_child($self->getBody);
}
return $element;
}
#------------------------------------------------------------------------------
# create an element, just with a mandatory name and an optional text
# the name can have the namespace:name form
# if the $name argument is a '<.*>' string, it's processed as XML and
# the new element is completely generated from it
sub createElement
{
my $self = shift;
my $name = shift;
my $text = shift;
my $element = OpenOffice::OODoc::XPath::new_element($name, @_);
unless ($element)
{
warn "[" . __PACKAGE__ . "::createElement] " .
"Element creation failure\n";
return undef;
}
$self->setText($element, $text) if ($text);
return $element;
}
#------------------------------------------------------------------------------
# replaces an element by another one
# the new element is inserted before the old one,
# then the old element is removed.
# the new element can be inserted by copy (default) or by reference
# return = new element if success, undef if failure
sub replaceElement
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $new_element = shift;
my %options =
(
mode => 'copy',
@_
);
unless ($new_element)
{
warn "[" . __PACKAGE__ . "::replaceElement] " .
"Missing new element\n";
return undef;
}
unless (ref $new_element)
{
$new_element = $self->createElement($new_element);
$options{'mode'} = 'reference';
}
unless ($new_element && $new_element->isElementNode)
{
warn "[" . __PACKAGE__ . "::replaceElement] " .
"No valid replacement\n";
return undef;
}
my $result = undef;
my $old_element = $self->getElement($path, $pos);
unless ($old_element)
{
warn "[" . __PACKAGE__ . "::replaceElement] " .
"Non existing element to be replaced\n";
return undef;
}
if (! $options{'mode'} || $options{'mode'} eq 'copy')
{
$result = $new_element->copy;
$result->replace($old_element);
return $result;
}
elsif ($options{'mode'} && $options{'mode'} eq 'reference')
{
$result = $self->insertElement
(
$old_element,
$new_element,
position => 'before'
);
$self->removeElement($old_element);
return $result;
}
else
{
warn "[" . __PACKAGE__ . "::replaceElement] " .
"Unknown option\n";
}
return undef;
}
#------------------------------------------------------------------------------
# adds a new or existing child element
sub appendElement
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $name = shift;
my %opt = @_;
$opt{'attribute'} = $opt{'attributes'} unless ($opt{'attribute'});
return undef unless $name;
my $element = undef;
unless (ref $name)
{
$element = $self->createElement($name, $opt{'text'});
}
else
{
$element = $name;
$self->setText($element, $opt{'text'}) if $opt{'text'};
}
return undef unless $element;
my $parent = $self->getElement($path, $pos);
unless ($parent)
{
warn "[" . __PACKAGE__ .
"::appendElement] Position not found\n";
return undef;
}
$element->paste_last_child($parent);
$self->setAttributes($element, %{$opt{'attribute'}});
return $element;
}
#------------------------------------------------------------------------------
# inserts a new element before or after a given node
# as appendElement, but the new element is a 'brother' (and not a child) of
# the first given element
sub insertElement
{
my $self = shift;
my $path = shift;
my $pos = (ref $path) ? undef : shift;
my $name = shift;
my %opt = @_;
return undef unless $name;
my $element = undef;
unless (ref $name)
{
$element = $self->createElement($name, $opt{'text'});
}
else
{
$element = $name;
$self->setText($element, $opt{'text'}) if $opt{'text'};
}
return undef unless $element;
my $posnode = $self->getElement($path, $pos);
unless ($posnode)
{
warn "[" . __PACKAGE__ . "::insertElement] Unknown position\n";
return undef;
}
if (($opt{'position'}) && ($opt{'position'} eq 'after'))
{
$element->paste_after($posnode);
}
else
{
$element->paste_before($posnode);
}
$self->setAttributes($element, %{$opt{'attribute'}});
return $element;
}
#------------------------------------------------------------------------------
# removes the given element & children
sub removeElement
{
my $self = shift;
my $e = $self->getElement(@_);
return undef unless $e;
return $e->delete;
}
#------------------------------------------------------------------------------
# cuts the given element & children (to be pasted elsewhere)
sub cutElement
{
my $self = shift;
my $e = $self->getElement(@_);
return undef unless $e;
$e->cut;
return $e;
}
#------------------------------------------------------------------------------
1;