#!/usr/bin/perl -w
#-----------------------------------------------------------------------------
#	$Id : oo2pod 0.1 2004-03-12 JMG$
#
#	Simple OpenOffice.org to POD conversion.
#
#	Usage : oo2pod <oofilename>
#
#	This demo script exports the content of a given OpenOffice.org file
#	POD on the standard output. In the present form, it's quite limited
#	and not flexible, in order to remain easily readable. It should be
#	considered as an example of text extraction using OpenOffice::OODoc
#	and not as the 'definitive' oo2pod filter.
#
#	Before extraction, some transformations are done in the document
#	in order to make it more convenient for a POD presentation. Some
#	pieces of metadata (title, subject, description), if defined, are
#	reported in the beginning of the POD. The footnotes are removed from
#	the content and reported in a special section at the end.
#
#	This script needs Text::Wrapper (that is not necessarily required
#	by the OpenOffice::OODoc installation). To implement more sophisicated
#	presentation rules, you could use Text::Format instead.
#	
#-----------------------------------------------------------------------------

use OpenOffice::OODoc	1.104;
use Text::Wrapper;

#-----------------------------------------------------------------------------
my $meta;	# will be the metadata object
my $doc;	# will be the document content object
#-----------------------------------------------------------------------------
# text output utilities (using Text::Wrapper)

my $paragraph_wrapper;
my $list_wrapper;

sub	BEGIN	# wrappers initialisation
	{
	# It's just an example; in a real application, the formatting rules
	# should be more flexibles and variables according to the style
	# of each source text element
	# Here, we prepair two kinds of wrappers, in order to have
	# a larger left margin for item lists than for ordinary paragraphs
	$paragraph_wrapper	= Text::Wrapper->new
			(
			columns		=> 76,
			par_start	=> '        ',
			body_start	=> '        '
			);
	$list_wrapper		= Text::Wrapper->new
			(
			columns		=> 76,
			par_start	=> '            ',
			body_start	=> '            '
			);
	}

sub	header_output
	{
	my ($level, $text) = @_;
	$text && print "=head$level\t$text\n\n";
	}

# output the content according to the type of text object
sub	content_output
	{
	my $element = shift; # it's an OOo text object (not a flat string)
	my $text = $doc->getText($element);

	# choose an output format according to the type
	if ($element->isItemList)
		{
		print $list_wrapper->wrap($text) . "\n";
		}
		# we use the paragraph output rule for any element
		# that is not a list
	else
		{
		print $paragraph_wrapper->wrap($text) . "\n";
		}
		# in a more specialised script, we could select another
		# alternative wrapper according to the style (using the
		# getStyle() method of OpenOffice::OODoc::Text)
	}

#-----------------------------------------------------------------------------

# initialise the OOo file object
my $ooarchive	= ooFile($ARGV[0])
	or die "No regular OpenOffice.org file\n";

# extract the metadata
$meta	= ooMeta(archive => $ooarchive)
	or warn "This file has not standard OOo properties. Looks strange.\n";

# extract the content
$doc	= ooDocument(archive => $ooarchive, member => 'content')
	or die "No standard OOo content ! I give up !\n";

# attempt to use some metadata to begin the output
if ($meta)
	{
	my $title = $meta->title;
	if ($title)
		{
		header_output(1, "NAME");
		print $paragraph_wrapper->wrap($title) . "\n";
		}
	my $subject = $meta->subject;
	if ($subject)
		{
		header_output(1, "SUBJECT");
		print $paragraph_wrapper->wrap($subject) . "\n";
		}
	my $description = $meta->description;
	if ($description)
		{
		header_output(1, "DESCRIPTION");
		print $paragraph_wrapper->wrap($description) . "\n";
		}
	# we could dump other metadata here...
	}

# the strange 2 next lines prevent the getText() method of
# OpenOffice::OODoc::Text (see the corresponding man page) from using
# its default tags for spans and footnotes
delete $doc->{'delimiters'}->{'text:span'};
delete $doc->{'delimiters'}->{'text:footnote-body'};

# here we select the tab as field separator for table field output
# (the default is ";" as for CSV output)
$doc->{'field_separator'} = "\t";

# in the next sequence, we will extract all the footnotes, store them for
# later processing and remove them from the content
my @notes = $doc->getFootnoteList;
$doc->removeElement($_) for @notes;

# get the full list of text objects (without the previously removed footnotes)
my @content = $doc->getTextElementList;

# if the first text element is not a header, we create a leading
# header here, using the title or an arbitrary name
header_output(1, $meta->title || "INTRODUCTION")
	unless ($content[0]->isHeader);
foreach my $element (@content)
	{
	my $level = $doc->getLevel($element); 	# get the hierarchical level
	if ($level)	# if an element has a 'level', it's a header
		{
		header_output($level, $doc->getText($element));
		}
	else
		{
		content_output($element);	
		}
	}

# all the document body is processed

if (@notes)
	{
	# OK, we have some footnotes in store
	# create a special section
	header_output(1, "NOTES");
	my $count = 0;
	while (@notes)
		{
		$count++;
		my $element = shift @notes;
		my $text = "[$count] " . $doc->getText($element);
		print	$paragraph_wrapper->wrap($text) . "\n";
		}
	}

# end of POD output
print "=cut\n";

exit;
#-----------------------------------------------------------------------------
=head1	NAME

	oo2pod - POD generation from an OpenOffice.org document

=cut