package HTML::LinkAdd;
our $VERSION = 0.13;	# POD and link titles

use strict;
use warnings;
use HTML::TokeParser;

=head1 NAME

HTML::LinkAdd - Add hyperlinks to phrases in HTML documents

=head1 SYNOPSIS

	use HTML::LinkAdd;
	my $page = new HTML::LinkAdd(
		'testinput1.html', {
			'the clocks were striking thirteen'=>'footnotes.html#OrwellG-1',
			'updated' => ['updated.html', 'View the latest update],
	});
	warn $page -> hyperlinked;
	$page ->save ('output.html');

=head1 DESCRIPTION

A simple object that accepts a class reference, a path to a file, 
and a hash of text-phrase/link-URLs,
and supplies a method to obtain the HTML with supplied hyperlinks interpolated.

If the values of the supplied has are anonymous lists, the first value
should a URI, the second escaped text to place in the link's C<title> attribute.

The phrase to hyperlink will be skipped if it appears in a context that prevents
linking, as defined in C<%$HTML::LinkAdd::SKIP>. This is currently 
C<head>, C<script>, C<style>>, C<pre>, C<xmp>, C<textarea>, C<object>, and C<a>.

=head1 DEPENDENCIES

L<HTML::TokeParser>

=head1 CONSTRUCTOR (new)

Accepts class reference, followed by either a filename or reference to a scalar of HTML
(as L<HTML::TokeParser|HTML::TokeParser>, and a hash of phrases and hyperlinks.

Returns a scalar that is the updated HTML.

=cut

our $SKIP = { map {$_=>1} qw{
	head pre xmp textarea object a script style
} };

sub new { 
	my ($class,$input) = (shift,shift);
	
	# Lets HTML::TokeParser handle the input file/string checks:-
	warn "HTML::LinkAdd::new called without a class ref?" and return undef unless defined $class;
	warn "Useage: new $class (\$path_to_file or \\\$HTML)" and return undef if not defined $input;

	my $self = bless {
		INPUT => $input,
		HREFS => {},
		output => '',
		skipto => [],
	},$class;

	my %args = ref($_[0]) eq 'HASH'? %{$_[0]} : @_; 
	warn "new requires a hash (or ref to such) as parameter." and return undef if not scalar keys %args;
	
	foreach my $phrase (keys %args){
		my $clean = $phrase;
		$clean =~ s{\s}{ }; # Squash whitespace in the phrase
		$self->{HREFS}->{$clean} = $args{$phrase};
	}
	
	# Create new TokeParser and parse all text, comparing HTML against keys of our targets
	my $p = new HTML::TokeParser ( $self->{INPUT} )
		or warn "Counldn't instantiate HTML::TokeParser!\n$!" and return undef;
	my $token;

	while ($token = $p->get_token and not (@$token[1] eq 'html' and @$token[0] eq 'E') ){
		
		 # warn "@$token[0] @$token[1] - [",  (scalar @{ $self->{skipto} }? join(', ', @{ $self->{skipto} }) : ''), "]\n";

		if (@$token[0] eq 'T'				# Text token
			and not @{ $self->{skipto} }	# and not ignoreing head/pre, etc
		) {
		
			@$token[1] =~ s{\s+}{ };		# Squash whitespace in the text

			# If we got a text node, loop over every user-supplied phrase
			foreach my $key ( keys %{$self->{HREFS}} ) {
				if (@$token[1] =~ m/\Q$key\E/sg){
					my ($title, $href);
					if (ref $self->{HREFS}->{$key}){
						($href, $title) = @{ $self->{HREFS}->{$key} };
					}
					else {
						$href = $self->{HREFS}->{$key};	
					}
					my $subs = "<a href=\"$href\""
					. ($title? " title=\"$title\"" : '')
					. ">$key</a>";
					@$token[1] =~ s/\Q$key\E/$subs/sg;
				}
			}
		};

		my $literal;
		if (@$token[0] eq 'S') { 
			$literal = @$token[4]; 
			# Skip PRE and XMP and TEXTAREA and HEAD
			if (exists $SKIP->{ @$token[1] }){
				unshift @{$self->{skipto}}, @$token[1]; 
			}
		}
		elsif (@$token[0] eq 'E') { 
			$literal = @$token[2];
			if (@{ $self->{skipto} }
			and @$token[1] eq $self->{skipto}->[0]){
				shift @{$self->{skipto}};
			}
		}
		else {
			$literal = @$token[1];
		}
		
		$self->{output} .= $literal;
	} 
	
	return $self;
}


=head1 PUBLIC METHOD hyperlink

Returns the hyperlinked HTML docuemnt constructed by...the constructor.

=cut

sub hyperlinked { return $_[0]->{output} }


=head1 PUBLIC METHOD save

Convenience method to save the object's C<output> slot to filename passed as scalar.

Returns undef on failure, C<1> on success.

=cut

sub save { my ($self,$filename) = (shift,shift);
	warn "HTML::LinkAdd::save requires a filename as parameter 1" and return undef unless defined $filename;
	local *OUT;
	open OUT, ">$filename"
		or warn "HTML::LinkAdd::save could not open the file <$filename> for writing.\n$!" and return undef;
		print OUT $self->{output};
	close OUT;
	return 1;
}

1;	# Return cleanly


__END__;

=head1 SEE ALSO

L<HTML::TokeParse>.

=head1 TODO

Add support for linking images by source or C<ID>.

=head1 AUTHOR

Lee Goddard C<lgoddard@cpan.org>

=head1 COPYRIGHT

Copyright 2001 (C) Lee Goddard. All Rights Reserved.
This is free software and you may use, abuse, amend and distribute under the same
terms as Perl itself.