#!/usr/bin/perl -w #----------------------------------------------------------------------------- # $Id : oo2pod 0.1 2004-03-12 JMG$ #----------------------------------------------------------------------------- =head1 NAME oo2pod - POD generation from an OpenOffice.org document =head1 SYNOPSIS Usage : oo2pod =head1 DESCRIPTION This demo script exports the content of a given OpenOffice.org file to POD on the standard output. In the present form, it's quite limited and not flexible, in order to remain easily readable. It should be considered as an example of text extraction using OpenOffice::OODoc and not as the 'definitive' oo2pod filter. Before extraction, some transformations are done in the document in order to make it more convenient for a POD presentation. Some pieces of metadata (title, subject, description), if defined, are reported in the beginning of the POD. The footnotes are removed from the content and reported in a special section at the end. This script needs Text::Wrapper (that is not necessarily required by the OpenOffice::OODoc installation). To implement more sophisicated presentation rules, you could use Text::Format instead. =cut use OpenOffice::OODoc; use Text::Wrapper; #----------------------------------------------------------------------------- my $meta; # will be the metadata object my $doc; # will be the document content object #----------------------------------------------------------------------------- # text output utilities (using Text::Wrapper) my $paragraph_wrapper; my $list_wrapper; sub BEGIN # wrappers initialisation { # It's just an example; in a real application, the formatting rules # should be more flexibles and variables according to the style # of each source text element # Here, we prepair two kinds of wrappers, in order to have # a larger left margin for item lists than for ordinary paragraphs $paragraph_wrapper = Text::Wrapper->new ( columns => 76, par_start => ' ', body_start => ' ' ); $list_wrapper = Text::Wrapper->new ( columns => 76, par_start => ' ', body_start => ' ' ); } sub header_output { my ($level, $text) = @_; $text && print "=head$level\t$text\n\n"; } # output the content according to the type of text object sub content_output { my $element = shift; # it's an OOo text object (not a flat string) my $text = $doc->getText($element); # choose an output format according to the type if ($element->isItemList) { print $list_wrapper->wrap($text) . "\n"; } # we use the paragraph output rule for any element # that is not a list else { print $paragraph_wrapper->wrap($text) . "\n"; } # in a more specialised script, we could select another # alternative wrapper according to the style (using the # getStyle() method of OpenOffice::OODoc::Text) } #----------------------------------------------------------------------------- # initialise the OOo file object my $ooarchive = ooFile($ARGV[0]) or die "No regular OpenOffice.org file\n"; # extract the metadata $meta = ooMeta(archive => $ooarchive) or warn "This file has not standard OOo properties. Looks strange.\n"; # extract the content $doc = ooDocument(archive => $ooarchive, member => 'content') or die "No standard OOo content ! I give up !\n"; # attempt to use some metadata to begin the output if ($meta) { my $title = $meta->title; if ($title) { header_output(1, "NAME"); print $paragraph_wrapper->wrap($title) . "\n"; } my $subject = $meta->subject; if ($subject) { header_output(1, "SUBJECT"); print $paragraph_wrapper->wrap($subject) . "\n"; } my $description = $meta->description; if ($description) { header_output(1, "DESCRIPTION"); print $paragraph_wrapper->wrap($description) . "\n"; } # we could dump other metadata here... } # the strange 2 next lines prevent the getText() method of # OpenOffice::OODoc::Text (see the corresponding man page) from using # its default tags for spans and footnotes delete $doc->{'delimiters'}->{'text:span'}; delete $doc->{'delimiters'}->{'text:footnote-body'}; # here we select the tab as field separator for table field output # (the default is ";" as for CSV output) $doc->{'field_separator'} = "\t"; # in the next sequence, we will extract all the footnotes, store them for # later processing and remove them from the content my @notes = $doc->getFootnoteList; $doc->removeElement($_) for @notes; # get the full list of text objects (without the previously removed footnotes) my @content = $doc->getTextElementList; # if the first text element is not a header, we create a leading # header here, using the title or an arbitrary name header_output(1, $meta->title || "INTRODUCTION") unless ($content[0]->isHeader); foreach my $element (@content) { my $level = $doc->getLevel($element); # get the hierarchical level if ($level) # if an element has a 'level', it's a header { header_output($level, $doc->getText($element)); } else { content_output($element); } } # all the document body is processed if (@notes) { # OK, we have some footnotes in store # create a special section header_output(1, "NOTES"); my $count = 0; while (@notes) { $count++; my $element = shift @notes; my $text = "[$count] " . $doc->getText($element); print $paragraph_wrapper->wrap($text) . "\n"; } } # end of POD output print "=cut\n"; exit; #-----------------------------------------------------------------------------