#!/usr/bin/perl

package Mail::Summary::Tools::Downloader::NNTP;
use Moose;

use Net::NNTP;
use Mail::Message;
use List::MoreUtils qw/any/;

use Mail::Summary::Tools::ThreadFilter::Util;
use DateTime;
use DateTime::Infinite;
use DateTime::Format::Mail;
use DateTime::Format::DateManip;

has cache => (
	isa => "Object",
	is  => "rw",
	required => 1,
);

has server => (
	isa => "Str",
	is  => "ro",
	required => 1,
);

has overviews => (
	isa => "HashRef",
	is  => "ro",
	default => sub { return {} },
);

has connection => (
	isa => "Net::NNTP",
	is  => "ro",
	lazy    => 1,
	default => sub { $_[0]->connect },
);

has fetch_recursively => (
	isa => "Bool",
	is  => "rw",
	default => 1,
);

has _downloaded => (
	isa => "HashRef",
	is  => "ro",
	default => sub { return {} },
);

sub connect {
	my $self = shift;
	Net::NNTP->new( $self->server, Debug => 1 ) || die "couldn't connect to " . $self->server;
}

sub overviews_for_group {
	my ( $self, %params ) = @_;
	my ( $group, $to, $from ) = @params{qw/group to from/};

	my $cache  = $self->cache;
	my $server = $self->server;

	# effiency hack
	unless ( $self->overviews->{$group} ) {
		if ( $cache->isa("Mail::Summary::Tools::YAMLCache") ) {
			my $all_overviews_key = join(":", "overviews", $server, $group);
			return $self->overviews->{$group} = $self->cache->get( $all_overviews_key ) || do {
				my %overviews;
				$self->cache->set( $all_overviews_key, \%overviews );
				\%overviews;
			}
		} else {
			my %overviews;
			$self->overviews->{$group} = \%overviews;

			foreach my $article ( $from .. $to ) {
				next if $overviews{$article};
				my $cache_key = join(":", "overviews", $server, $group, $article);
				if ( my $article_overviews = $cache->get( $cache_key ) ) {
					$overviews{$article} = $article_overviews;
				}
			}

			return \%overviews;
		}
	}
}

sub split_ranges {
	my ( $self, @ranges ) = @_;

	# split the ranges up into smaller chunks... If xover gets a too big number it barfs sometimes.
	return map {
		my $start = $_->[0];
		my $end   = $_->[1];
		my $count = $end - $start;

		my $magic = 1000;

		my $div = int( $count / $magic );

		( $div
			? ( (map { [ $start + ( ($_-1) * $magic ), ($start + ( $_ * $magic ))-1 ] } 1 .. $div), [ $start + ( $div * $magic ), $end ] )
			: ( $_ ) )
	} @ranges;
}

sub determine_missing_header_ranges {
	my ( $self, %params ) = @_;
	my ( $overviews, $to, $from ) = @params{qw/overviews to from/};

	if ( ( my @got = sort { $a <=> $b } keys %$overviews ) > 10 ) {
		my @ranges;

		warn "previous articles exist";
		my $prev = shift @got;
		push @ranges, [ $from, $prev-1 ] unless $from == $prev;

		foreach my $article ( @got ) {
			if ( ($article - 1) != $prev ) {
				warn "adding range: $prev+1 .. $article-1";
				push @ranges, [ $prev+1, $article-1 ];
			}

			$prev = $article;
		}

		push @ranges, [ $prev+1, $to ] unless $to == $prev;

		return @ranges;
	} else {
		warn "getting everything";
		return [ $from, $to ];
	}
}

sub fetch_overviews_in_ranges {
	my ( $self, %params ) = @_;
	my ( $overviews, $ranges, $group ) = @params{qw/overviews ranges group/};

	my @important_headers = qw/Date References Message-ID/;

	my $connection = $self->connection;
	my $cache  = $self->cache;
	my $server = $self->server;

	my @overview_headers = map { my $header = $_; $header =~ s/:$//; $header } @{ $connection->overview_fmt };
	my %header_indices; @header_indices{@overview_headers} = 0 .. $#overview_headers;
	my @keep_headers; @keep_headers[map { $header_indices{$_} } @important_headers] = ( (1) x scalar(@important_headers) );

	foreach my $range ( @$ranges ) {
		my $raw_overviews = $connection->xover($range);

		foreach my $overview ( values %$raw_overviews ) {
			$overview = {
				map { $overview_headers[$_] => $overview->[$_] }
				grep { $keep_headers[$_] } 0 .. $#overview_headers
			};
		}

		@{ $overviews }{ keys %$raw_overviews } = values %$raw_overviews;

		unless ( $cache->isa("Mail::Summary::Tools::YAMLCache") ) {
			# this is not necessary for the yaml cache, because of our hack.
			# the hash is shared that way and just gets updated in place
			foreach my $article ( keys %$overviews ) {
				my $cache_key = join(":", "overviews", $server, $group, $article);
				$cache->set( $cache_key, $overviews->{$article} );
			}
		}
	}
}

sub fetch_overviews_for_group {
	my ( $self, %params ) = @_;

	my $overviews = $self->overviews_for_group(%params);

	my @ranges = $self->split_ranges(
		$self->determine_missing_header_ranges(
			%params,
			overviews => $overviews,
		),
	);

	$self->fetch_overviews_in_ranges(
		ranges    => \@ranges,
		overviews => $overviews,
	);

	delete @{ $overviews }{ 1 .. $params{from}-1 }; # FIXME cache this in non YAML cache too

	return $overviews;
}

sub set_group {
	my ( $self, $group ) = @_;

	my ( $to, $from ) = $self->connection->group($group) or die "$group doesn't exist";

	my $overviews = $self->fetch_overviews_for_group(
		group => $group,
		from  => $from,
		to    => $to,
	);

	return (
		overviews => $overviews,
		from => $from,
		to   => $to,
	);
}

sub download {
	my ( $self, %params ) = @_;

	my ( $from_date, $to_date ) = delete @params{qw/from to/};

	my $range = Date::Range::Forgiving->new( $from_date, $to_date ); # ACKCKKK Fixme

	%params = ( $self->set_group($params{group}), %params );

	my $overviews = $params{overviews};

	$self->for_articles_in_date_range(
		sub {
			my $article = shift;
			
			$self->get_article(
				%params,
				article  => $article,
				overview => $overviews->{$article},
			);
		},
		%params,
		date_range => $range,
	);

}

sub for_articles_in_date_range {
	my ( $self, $body, %params ) = @_;
	my ( $overviews, $from, $to, $range ) = @params{qw/overviews from to date_range/};

	foreach my $article ( $from .. $to ) {
		next unless my $overview = $overviews->{$article};
		my $date_header = $overview->{Date};
		my $date;

		my @errors;
		$date = eval { DateTime::Format::Mail->new->loose->parse_datetime( $date_header ) };
		push @errors, $@ if $@;
		$date ||= eval { DateTime::Format::DateManip->parse_datetime( $date_header ) };
		push @errors, $@ if $@;

		warn "Error parsing date '$date_header': @errors" unless defined $date;

		$date ||= DateTime->now;

		$body->($article) if $range->includes( $date );
	}
}

sub get_article {
	my ( $self, %params ) = @_;
	my ( $overview, $article ) = delete @params{qw/overview article/};

	if ( $self->fetch_recursively ) {
		foreach my $message_id ( $overview->{'References'} =~ / ( < \S+ \@ \S+ > ) /gx ) {
			warn "additional thread root: $message_id";
			$self->get_message_if_needed(
				%params,
				message_id => $message_id,
			);
		}
	}

	$self->get_message_if_needed(
		%params,
		article => $article,
		message_id => $overview->{'Message-ID'},
	);
}

sub get_message_if_needed {
	my ( $self, %params ) = @_;	
	my ( $message_id, $mbox, $extra ) = @params{qw/message_id mailbox extra_mailboxes/};

	return if $self->_downloaded->{$message_id}++;

	return if any { $_->find($message_id) } $mbox, @$extra;

	$self->fetch_message_id( %params );
}

sub fetch_message_id {
	my ( $self, %params ) = @_;
	my ( $article, $message_id, $mbox ) = @params{qw/article message_id mailbox/};

	if ( my $article = $self->connection->article( $article || $message_id )  ) {
		my $message = Mail::Message->read( $article );
		$mbox->addMessage( $message );
	} else {
		warn "couldn't fetch article: " . ($article || $message_id);
	}
}

__PACKAGE__;

__END__

=pod

=head1 NAME

Mail::Summary::Tools::Downloader::NNTP - Get NNTP articles and their thread roots.

=head1 SYNOPSIS

	use Mail::Summary::Tools::Downloader::NNTP;

	my $downloader = Mail::Summary::Tools::Downloader::NNTP->new(
		server => "nntp.perl.org",
		cache  => $article_cache,
		fetch_recursively => 0,
	);

	my $mgr = Mail::Box::Manager->new;
	my $mbox = $mgr->open( "foo" );

	$downloader->download(
		group   => "perl.perl6.language",
		from    => 10000,
		to      => 11000,
		mailbox => $mbox,
		extra_mailboxes => \@extra,
	);

=head1 DESCRIPTION

This utility makes downloading mailing list archives from an nntp server
into a mailbox trivial.

Messages whose message ID is already in any of the mailboxes are not
downloaded.

Additionally, message IDs listed in the C<References> header will also be
fetched if C<fetch_recursively> is on (the default).

Since L<Mail::Box::Thread::Manager> can thread messages from multiple mailboxes
this one can download the next batch of articles with C<fetch_recursively>
enabled, and using a log-rotation like mechanism delete older mailboxes without
fear of breaking the threads, at the cost of some redundant downloads.

=cut