#!/usr/bin/perl -w
#	$Id : oo2pod 0.1 2004-03-12 JMG$

=head1	NAME

oo2pod - POD generation from an OpenOffice.org document


Usage : oo2pod <oofilename>


This demo script exports the content of a given OpenOffice.org file
to POD on the standard output. In the present form, it's quite limited
and not flexible, in order to remain easily readable. It should be
considered as an example of text extraction using OpenOffice::OODoc
and not as the 'definitive' oo2pod filter.

Before extraction, some transformations are done in the document
in order to make it more convenient for a POD presentation. Some
pieces of metadata (title, subject, description), if defined, are
reported in the beginning of the POD. The footnotes are removed from
the content and reported in a special section at the end.

This script needs Text::Wrapper (that is not necessarily required
by the OpenOffice::OODoc installation). To implement more sophisicated
presentation rules, you could use Text::Format instead.


use OpenOffice::OODoc;
use Text::Wrapper;

my $meta;	# will be the metadata object
my $doc;	# will be the document content object
# text output utilities (using Text::Wrapper)

my $paragraph_wrapper;
my $list_wrapper;

sub	BEGIN	# wrappers initialisation
	# It's just an example; in a real application, the formatting rules
	# should be more flexibles and variables according to the style
	# of each source text element
	# Here, we prepair two kinds of wrappers, in order to have
	# a larger left margin for item lists than for ordinary paragraphs
	$paragraph_wrapper	= Text::Wrapper->new
			columns		=> 76,
			par_start	=> '        ',
			body_start	=> '        '
	$list_wrapper		= Text::Wrapper->new
			columns		=> 76,
			par_start	=> '            ',
			body_start	=> '            '

sub	header_output
	my ($level, $text) = @_;
	$text && print "=head$level\t$text\n\n";

# output the content according to the type of text object
sub	content_output
	my $element = shift; # it's an OOo text object (not a flat string)
	my $text = $doc->getText($element);

	# choose an output format according to the type
	if ($element->isItemList)
		print $list_wrapper->wrap($text) . "\n";
		# we use the paragraph output rule for any element
		# that is not a list
		print $paragraph_wrapper->wrap($text) . "\n";
		# in a more specialised script, we could select another
		# alternative wrapper according to the style (using the
		# getStyle() method of OpenOffice::OODoc::Text)


# initialise the OOo file object
my $ooarchive	= ooFile($ARGV[0])
	or die "No regular OpenOffice.org file\n";

# extract the metadata
$meta	= ooMeta(archive => $ooarchive)
	or warn "This file has not standard OOo properties. Looks strange.\n";

# extract the content
$doc	= ooDocument(archive => $ooarchive, member => 'content')
	or die "No standard OOo content ! I give up !\n";

# attempt to use some metadata to begin the output
if ($meta)
	my $title = $meta->title;
	if ($title)
		header_output(1, "NAME");
		print $paragraph_wrapper->wrap($title) . "\n";
	my $subject = $meta->subject;
	if ($subject)
		header_output(1, "SUBJECT");
		print $paragraph_wrapper->wrap($subject) . "\n";
	my $description = $meta->description;
	if ($description)
		header_output(1, "DESCRIPTION");
		print $paragraph_wrapper->wrap($description) . "\n";
	# we could dump other metadata here...

# the strange 2 next lines prevent the getText() method of
# OpenOffice::OODoc::Text (see the corresponding man page) from using
# its default tags for spans and footnotes
delete $doc->{'delimiters'}->{'text:span'};
delete $doc->{'delimiters'}->{'text:footnote-body'};

# here we select the tab as field separator for table field output
# (the default is ";" as for CSV output)
$doc->{'field_separator'} = "\t";

# in the next sequence, we will extract all the footnotes, store them for
# later processing and remove them from the content
my @notes = $doc->getFootnoteList;
$doc->removeElement($_) for @notes;

# get the full list of text objects (without the previously removed footnotes)
my @content = $doc->getTextElementList;

# if the first text element is not a header, we create a leading
# header here, using the title or an arbitrary name
header_output(1, $meta->title || "INTRODUCTION")
	unless ($content[0]->isHeader);
foreach my $element (@content)
	my $level = $doc->getLevel($element); 	# get the hierarchical level
	if ($level)	# if an element has a 'level', it's a header
		header_output($level, $doc->getText($element));

# all the document body is processed

if (@notes)
	# OK, we have some footnotes in store
	# create a special section
	header_output(1, "NOTES");
	my $count = 0;
	while (@notes)
		my $element = shift @notes;
		my $text = "[$count] " . $doc->getText($element);
		print	$paragraph_wrapper->wrap($text) . "\n";

# end of POD output
print "=cut\n";