#!/usr/bin/perl -w
#-----------------------------------------------------------------------------
# $Id : oo2pod 0.1 2004-03-12 JMG$
#
# Simple OpenOffice.org to POD conversion.
#
# Usage : oo2pod <oofilename>
#
# This demo script exports the content of a given OpenOffice.org file
# POD on the standard output. In the present form, it's quite limited
# and not flexible, in order to remain easily readable. It should be
# considered as an example of text extraction using OpenOffice::OODoc
# and not as the 'definitive' oo2pod filter.
#
# Before extraction, some transformations are done in the document
# in order to make it more convenient for a POD presentation. Some
# pieces of metadata (title, subject, description), if defined, are
# reported in the beginning of the POD. The footnotes are removed from
# the content and reported in a special section at the end.
#
# This script needs Text::Wrapper (that is not necessarily required
# by the OpenOffice::OODoc installation). To implement more sophisicated
# presentation rules, you could use Text::Format instead.
#
#-----------------------------------------------------------------------------
use OpenOffice::OODoc 1.104;
use Text::Wrapper;
#-----------------------------------------------------------------------------
my $meta; # will be the metadata object
my $doc; # will be the document content object
#-----------------------------------------------------------------------------
# text output utilities (using Text::Wrapper)
my $paragraph_wrapper;
my $list_wrapper;
sub BEGIN # wrappers initialisation
{
# It's just an example; in a real application, the formatting rules
# should be more flexibles and variables according to the style
# of each source text element
# Here, we prepair two kinds of wrappers, in order to have
# a larger left margin for item lists than for ordinary paragraphs
$paragraph_wrapper = Text::Wrapper->new
(
columns => 76,
par_start => ' ',
body_start => ' '
);
$list_wrapper = Text::Wrapper->new
(
columns => 76,
par_start => ' ',
body_start => ' '
);
}
sub header_output
{
my ($level, $text) = @_;
$text && print "=head$level\t$text\n\n";
}
# output the content according to the type of text object
sub content_output
{
my $element = shift; # it's an OOo text object (not a flat string)
my $text = $doc->getText($element);
# choose an output format according to the type
if ($element->isItemList)
{
print $list_wrapper->wrap($text) . "\n";
}
# we use the paragraph output rule for any element
# that is not a list
else
{
print $paragraph_wrapper->wrap($text) . "\n";
}
# in a more specialised script, we could select another
# alternative wrapper according to the style (using the
# getStyle() method of OpenOffice::OODoc::Text)
}
#-----------------------------------------------------------------------------
# initialise the OOo file object
my $ooarchive = ooFile($ARGV[0])
or die "No regular OpenOffice.org file\n";
# extract the metadata
$meta = ooMeta(archive => $ooarchive)
or warn "This file has not standard OOo properties. Looks strange.\n";
# extract the content
$doc = ooDocument(archive => $ooarchive, member => 'content')
or die "No standard OOo content ! I give up !\n";
# attempt to use some metadata to begin the output
if ($meta)
{
my $title = $meta->title;
if ($title)
{
header_output(1, "NAME");
print $paragraph_wrapper->wrap($title) . "\n";
}
my $subject = $meta->subject;
if ($subject)
{
header_output(1, "SUBJECT");
print $paragraph_wrapper->wrap($subject) . "\n";
}
my $description = $meta->description;
if ($description)
{
header_output(1, "DESCRIPTION");
print $paragraph_wrapper->wrap($description) . "\n";
}
# we could dump other metadata here...
}
# the strange 2 next lines prevent the getText() method of
# OpenOffice::OODoc::Text (see the corresponding man page) from using
# its default tags for spans and footnotes
delete $doc->{'delimiters'}->{'text:span'};
delete $doc->{'delimiters'}->{'text:footnote-body'};
# here we select the tab as field separator for table field output
# (the default is ";" as for CSV output)
$doc->{'field_separator'} = "\t";
# in the next sequence, we will extract all the footnotes, store them for
# later processing and remove them from the content
my @notes = $doc->getFootnoteList;
$doc->removeElement($_) for @notes;
# get the full list of text objects (without the previously removed footnotes)
my @content = $doc->getTextElementList;
# if the first text element is not a header, we create a leading
# header here, using the title or an arbitrary name
header_output(1, $meta->title || "INTRODUCTION")
unless ($content[0]->isHeader);
foreach my $element (@content)
{
my $level = $doc->getLevel($element); # get the hierarchical level
if ($level) # if an element has a 'level', it's a header
{
header_output($level, $doc->getText($element));
}
else
{
content_output($element);
}
}
# all the document body is processed
if (@notes)
{
# OK, we have some footnotes in store
# create a special section
header_output(1, "NOTES");
my $count = 0;
while (@notes)
{
$count++;
my $element = shift @notes;
my $text = "[$count] " . $doc->getText($element);
print $paragraph_wrapper->wrap($text) . "\n";
}
}
# end of POD output
print "=cut\n";
exit;
#-----------------------------------------------------------------------------
=head1 NAME
oo2pod - POD generation from an OpenOffice.org document
=cut