# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

=head1 NAME

Mail::SpamAssassin::AsyncLoop - scanner asynchronous event loop

=head1 DESCRIPTION

An asynchronous event loop used for long-running operations, performed "in the
background" during the Mail::SpamAssassin::check() scan operation, such as DNS
blocklist lookups.

=head1 METHODS

=over 4

=cut

package Mail::SpamAssassin::AsyncLoop;

use strict;
use warnings;
# use bytes;
use re 'taint';

use Time::HiRes qw(time);

use Mail::SpamAssassin;
use Mail::SpamAssassin::Logger;

our @ISA = qw();

# obtain timer resolution if possible
our $timer_resolution;
BEGIN {
  eval {
    $timer_resolution = Time::HiRes->can('clock_getres')
      ? Time::HiRes::clock_getres(Time::HiRes::CLOCK_REALTIME())
      : 0.001;  # wild guess, assume resolution is better than 1s
    1;
  } or do {
    $timer_resolution = 1;  # Perl's builtin timer ticks at one second
  };
}

#############################################################################

sub new {
  # called from PerMsgStatus, a new AsyncLoop object is created
  # for each new message processing
  my $class = shift;
  $class = ref($class) || $class;

  my ($main) = @_;
  my $self = {
    main                => $main,
    queries_started     => 0,
    queries_completed   => 0,
    total_queries_started   => 0,
    total_queries_completed => 0,
    pending_lookups     => { },
    timing_by_query     => { },
    all_lookups         => { },  # keyed by "rr_type/domain"
  };

  bless ($self, $class);
  $self;
}

# Given a domain name, produces a listref of successively stripped down
# parent domains, e.g. a domain '2.10.Example.COM' would produce a list:
# '2.10.example.com', '10.example.com', 'example.com', 'com', ''
#
sub domain_to_search_list {
  my ($domain) = @_;
  $domain =~ s/^\.+//; $domain =~ s/\.+\z//;  # strip leading and trailing dots
  my @search_keys;
  if ($domain =~ /\[/) {  # don't split address literals
    @search_keys = ( $domain, '' );  # presumably an address literal
  } else {
    local $1;
    $domain = lc $domain;
    for (;;) {
      push(@search_keys, $domain);
      last  if $domain eq '';
      # strip one level
      $domain = ($domain =~ /^ (?: [^.]* ) \. (.*) \z/xs) ? $1 : '';
    }
    if (@search_keys > 20) {  # enforce some sanity limit
      @search_keys = @search_keys[$#search_keys-19 .. $#search_keys];
    }
  }
  return \@search_keys;
}

# ---------------------------------------------------------------------------

=item $ent = $async->start_lookup($ent, $master_deadline)

Register the start of a long-running asynchronous lookup operation.
C<$ent> is a hash reference containing the following items:

=over 4

=item key (required)

A key string, unique to this lookup.  This is what is reported in
debug messages, used as the key for C<get_lookup()>, etc.

=item id (required)

An ID string, also unique to this lookup.  Typically, this is the DNS packet ID
as returned by DnsResolver's C<bgsend> method.  Sadly, the Net::DNS
architecture forces us to keep a separate ID string for this task instead of
reusing C<key> -- if you are not using DNS lookups through DnsResolver, it
should be OK to just reuse C<key>.

=item type (required)

A string, typically one word, used to describe the type of lookup in log
messages, such as C<DNSBL>, C<MX>, C<TXT>.

=item zone (optional)

A zone specification (typically a DNS zone name - e.g. host, domain, or RBL)
which may be used as a key to look up per-zone settings. No semantics on this
parameter is imposed by this module. Currently used to fetch by-zone timeouts.

=item timeout_initial (optional)

An initial value of elapsed time for which we are willing to wait for a
response (time in seconds, floating point value is allowed). When elapsed
time since a query started exceeds the timeout value and there are no other
queries to wait for, the query is aborted. The actual timeout value ranges
from timeout_initial and gradually approaches timeout_min (see next parameter)
as the number of already completed queries approaches the number of all
queries started.

If a caller does not explicitly provide this parameter or its value is
undefined, a default initial timeout value is settable by a configuration
variable rbl_timeout.

If a value of the timeout_initial parameter is below timeout_min, the initial
timeout is set to timeout_min.

=item timeout_min (optional)

A lower bound (in seconds) to which the actual timeout approaches as the
number of queries completed approaches the number of all queries started.
Defaults to 0.2 * timeout_initial.

=back

C<$ent> is returned by this method, with its contents augmented by additional
information.

=cut

sub start_lookup {
  my ($self, $ent, $master_deadline) = @_;

  my $id  = $ent->{id};
  my $key = $ent->{key};
  defined $id && $id ne ''  or die "oops, no id";
  $key                      or die "oops, no key";
  $ent->{type}              or die "oops, no type";

  my $now = time;
  $ent->{start_time} = $now  if !defined $ent->{start_time};

  # are there any applicable per-zone settings?
  my $zone = $ent->{zone};
  my $settings;  # a ref to a by-zone or to global settings
  my $conf_by_zone = $self->{main}->{conf}->{by_zone};
  if (defined $zone && $conf_by_zone) {
  # dbg("async: searching for by_zone settings for $zone");
    $zone =~ s/^\.//;  $zone =~ s/\.\z//;  # strip leading and trailing dot
    for (;;) {  # 2.10.example.com, 10.example.com, example.com, com, ''
      if (exists $conf_by_zone->{$zone}) {
        $settings = $conf_by_zone->{$zone};
        last;
      } elsif ($zone eq '') {
        last;
      } else {  # strip one level, careful with address literals
        $zone = ($zone =~ /^( (?: [^.] | \[ (?: \\. | [^\]\\] )* \] )* )
                            \. (.*) \z/xs) ? $2 : '';
      }
    }
  }

  dbg("async: applying by_zone settings for %s", $zone)  if $settings;

  my $t_init = $ent->{timeout_initial};  # application-specified has precedence
  $t_init = $settings->{rbl_timeout}  if $settings && !defined $t_init;
  $t_init = $self->{main}->{conf}->{rbl_timeout}  if !defined $t_init;
  $t_init = 0  if !defined $t_init;      # last-resort default, just in case

  my $t_end = $ent->{timeout_min};       # application-specified has precedence
  $t_end = $settings->{rbl_timeout_min}  if $settings && !defined $t_end;
  $t_end = $self->{main}->{conf}->{rbl_timeout_min}  if !defined $t_end; # added for bug 7070
  $t_end = 0.2 * $t_init  if !defined $t_end;
  $t_end = 0  if $t_end < 0;  # just in case
  $t_init = $t_end  if $t_init < $t_end;

  my $clipped_by_master_deadline = 0;
  if (defined $master_deadline) {
    my $time_avail = $master_deadline - time;
    $time_avail = 0.5  if $time_avail < 0.5;  # give some slack
    if ($t_init > $time_avail) {
      $t_init = $time_avail; $clipped_by_master_deadline = 1;
      $t_end  = $time_avail  if $t_end > $time_avail;
    }
  }
  $ent->{timeout_initial} = $t_init;
  $ent->{timeout_min} = $t_end;

  $ent->{display_id} =  # identifies entry in debug logging and similar
    join(", ", grep { defined }
               map { ref $ent->{$_} ? @{$ent->{$_}} : $ent->{$_} }
               qw(sets rules rulename type key) );

  $self->{pending_lookups}->{$key} = $ent;

  $self->{queries_started}++;
  $self->{total_queries_started}++;
  dbg("async: starting: %s (timeout %.1fs, min %.1fs)%s",
      $ent->{display_id}, $ent->{timeout_initial}, $ent->{timeout_min},
      !$clipped_by_master_deadline ? '' : ', capped by time limit');

  $ent;
}

# ---------------------------------------------------------------------------

=item $ent = $async->bgsend_and_start_lookup($domain, $type, $class, $ent, $cb, %options)

A common idiom: calls C<bgsend>, followed by a call to C<start_lookup>,
returning the argument $ent object as modified by C<start_lookup> and
filled-in with a query ID.

=cut

sub bgsend_and_start_lookup {
  my($self, $domain, $type, $class, $ent, $cb, %options) = @_;
  $ent = {}  if !$ent;
  $domain =~ s/\.+\z//s;  # strip trailing dots, these sometimes still sneak in
  $ent->{id} = undef;
  $ent->{query_type} = $type;
  $ent->{query_domain} = $domain;
  $ent->{type} = $type  if !exists $ent->{type};
  $cb = $ent->{completed_callback}  if !$cb;  # compatibility with SA < 3.4

  my $key = $ent->{key} || '';

  my $dnskey = uc($type) . '/' . lc($domain);
  my $dns_query_info = $self->{all_lookups}{$dnskey};

  if ($dns_query_info) {  # DNS query already underway or completed
    my $id = $ent->{id} = $dns_query_info->{id};  # re-use existing query
    return if !defined $id;  # presumably blocked, or other fatal failure
    my $id_tail = $id; $id_tail =~ s{^\d+/IN/}{};
    lc($id_tail) eq lc($dnskey)
      or info("async: unmatched id %s, key=%s", $id, $dnskey);

    my $pkt = $dns_query_info->{pkt};
    if (!$pkt) {  # DNS query underway, still waiting for results
      # just add our query to the existing one
      push(@{$dns_query_info->{applicants}}, [$ent,$cb]);
      dbg("async: query %s already underway, adding no.%d %s",
          $id, scalar @{$dns_query_info->{applicants}},
          $ent->{rulename} || $key);

    } else {  # DNS query already completed, re-use results
      # answer already known, just do the callback and be done with it
      if (!$cb) {
        dbg("async: query %s already done, re-using for %s", $id, $key);
      } else {
        dbg("async: query %s already done, re-using for %s, callback",
            $id, $key);
        eval {
          $cb->($ent, $pkt); 1;
        } or do {
          chomp $@;
          # resignal if alarm went off
          die "async: (1) $@\n"  if $@ =~ /__alarm__ignore__\(.*\)/s;
          warn sprintf("query %s completed, callback %s failed: %s\n",
                       $id, $key, $@);
        };
      }
    }
  }

  else {  # no existing query, open a new DNS query
    $dns_query_info = $self->{all_lookups}{$dnskey} = {};  # new query needed
    my($id, $blocked);
    my $dns_query_blockages = $self->{main}->{conf}->{dns_query_blocked};
    if ($dns_query_blockages) {
      my $search_list = domain_to_search_list($domain);
      foreach my $parent_domain (@$search_list) {
        $blocked = $dns_query_blockages->{$parent_domain};
        last if defined $blocked; # stop at first defined, can be true or false
      }
    }
    if ($blocked) {
      dbg("async: blocked by dns_query_restriction: %s", $dnskey);
    } else {
      dbg("async: launching %s for %s", $dnskey, $key);
      $id = $self->{main}->{resolver}->bgsend($domain, $type, $class, sub {
          my($pkt, $pkt_id, $timestamp) = @_;
          # this callback sub is called from DnsResolver::poll_responses()
        # dbg("async: in a bgsend_and_start_lookup callback, id %s", $pkt_id);
          if ($pkt_id ne $id) {
            warn "async: mismatched dns id: got $pkt_id, expected $id\n";
            return;
          }
          $self->set_response_packet($pkt_id, $pkt, $ent->{key}, $timestamp);
          $dns_query_info->{pkt} = $pkt;
          my $cb_count = 0;
          foreach my $tuple (@{$dns_query_info->{applicants}}) {
            my($appl_ent, $appl_cb) = @$tuple;
            if ($appl_cb) {
              dbg("async: calling callback on key %s%s", $key,
                  !defined $appl_ent->{rulename} ? ''
                    : ", rule ".$appl_ent->{rulename});
              $cb_count++;
              eval {
                $appl_cb->($appl_ent, $pkt); 1;
              } or do {
                chomp $@;
                # resignal if alarm went off
                die "async: (2) $@\n"  if $@ =~ /__alarm__ignore__\(.*\)/s;
                warn sprintf("query %s completed, callback %s failed: %s\n",
                             $id, $appl_ent->{key}, $@);
              };
            }
          }
          delete $dns_query_info->{applicants};
          dbg("async: query $id completed, no callbacks run")  if !$cb_count;
        });
    }
    return if !defined $id;
    $dns_query_info->{id} = $ent->{id} = $id;
    push(@{$dns_query_info->{applicants}}, [$ent,$cb]);
    $self->start_lookup($ent, $options{master_deadline});
  }
  return $ent;
}

# ---------------------------------------------------------------------------

=item $ent = $async->get_lookup($key)

Retrieve the pending-lookup object for the given key C<$key>.

If the lookup is complete, this will return C<undef>.

Note that a lookup is still considered "pending" until C<complete_lookups()> is
called, even if it has been reported as complete via C<set_response_packet()>.

=cut

sub get_lookup {
  my ($self, $key) = @_;
  return $self->{pending_lookups}->{$key};
}

# ---------------------------------------------------------------------------

=item $async->log_lookups_timing()

Log sorted timing for all completed lookups.

=cut

sub log_lookups_timing {
  my ($self) = @_;
  my $timings = $self->{timing_by_query};
  for my $key (sort { $timings->{$a} <=> $timings->{$b} } keys %$timings) {
    dbg("async: timing: %.3f %s", $timings->{$key}, $key);
  }
}

# ---------------------------------------------------------------------------

=item $alldone = $async->complete_lookups()

Perform a poll of the pending lookups, to see if any are completed.
Callbacks on completed queries will be called from poll_responses().

If there are no lookups remaining, or if too much time has elapsed since
any results were returned, C<1> is returned, otherwise C<0>.

=cut

sub complete_lookups {
  my ($self, $timeout, $allow_aborting_of_expired) = @_;
  my $alldone = 0;
  my $anydone = 0;
  my $allexpired = 1;
  my %typecount;

  my $pending = $self->{pending_lookups};
  $self->{queries_started} = 0;
  $self->{queries_completed} = 0;

  my $now = time;

  if (defined $timeout && $timeout > 0 &&
      %$pending && $self->{total_queries_started} > 0)
  {
    # shrink a 'select' timeout if a caller specified unnecessarily long
    # value beyond the latest deadline of any outstanding request;
    # can save needless wait time (up to 1 second in harvest_dnsbl_queries)
    my $r = $self->{total_queries_completed} / $self->{total_queries_started};
    my $r2 = $r * $r;  # 0..1
    my $max_deadline;
    while (my($key,$ent) = each %$pending) {
      my $t_init = $ent->{timeout_initial};
      my $dt = $t_init - ($t_init - $ent->{timeout_min}) * $r2;
      my $deadline = $ent->{start_time} + $dt;
      $max_deadline = $deadline  if !defined $max_deadline ||
                                    $deadline > $max_deadline;
    }
    if (defined $max_deadline) {
      # adjust to timer resolution, only deals with 1s and with fine resolution
      $max_deadline = 1 + int $max_deadline
        if $timer_resolution == 1 && $max_deadline > int $max_deadline;
      my $sufficient_timeout = $max_deadline - $now;
      $sufficient_timeout = 0  if $sufficient_timeout < 0;
      if ($timeout > $sufficient_timeout) {
        dbg("async: reducing select timeout from %.1f to %.1f s",
            $timeout, $sufficient_timeout);
        $timeout = $sufficient_timeout;
      }
    }
  }

  # trap this loop in an eval { } block, as Net::DNS could throw
  # die()s our way; in particular, process_dnsbl_results() has
  # thrown die()s before (bug 3794).
  eval {

    if (%$pending) {  # any outstanding requests still?
      $self->{last_poll_responses_time} = $now;
      my $nfound = $self->{main}->{resolver}->poll_responses($timeout);
      dbg("async: select found %s responses ready (t.o.=%.1f)",
          !$nfound ? 'no' : $nfound,  $timeout);
    }
    $now = time;  # capture new timestamp, after possible sleep in 'select'

    # A callback routine may generate another DNS query, which may insert
    # an entry into the %$pending hash thus invalidating the each() context.
    # So, make sure that callbacks are not called while the each() context
    # is open. [Bug 6937]
    #
    while (my($key,$ent) = each %$pending) {
      my $id = $ent->{id};
      if (exists $self->{finished}->{$id}) {
        delete $self->{finished}->{$id};
        $anydone = 1;
        $ent->{finish_time} = $now  if !defined $ent->{finish_time};
        my $elapsed = $ent->{finish_time} - $ent->{start_time};
        dbg("async: completed in %.3f s: %s", $elapsed, $ent->{display_id});
        $self->{timing_by_query}->{". $key"} += $elapsed;
        $self->{queries_completed}++;
        $self->{total_queries_completed}++;
        delete $pending->{$key};
      }
    }

    if (%$pending) {  # still any requests outstanding? are they expired?
      my $r =
        !$allow_aborting_of_expired || !$self->{total_queries_started} ? 1.0
        : $self->{total_queries_completed} / $self->{total_queries_started};
      my $r2 = $r * $r;  # 0..1
      while (my($key,$ent) = each %$pending) {
        $typecount{$ent->{type}}++;
        my $t_init = $ent->{timeout_initial};
        my $dt = $t_init - ($t_init - $ent->{timeout_min}) * $r2;
        # adjust to timer resolution, only deals with 1s and fine resolution
        $dt = 1 + int $dt  if $timer_resolution == 1 && $dt > int $dt;
        $allexpired = 0  if $now <= $ent->{start_time} + $dt;
      }
      dbg("async: queries completed: %d, started: %d",
          $self->{queries_completed}, $self->{queries_started});
    }

    # ensure we don't get stuck if a request gets lost in the ether.
    if (! %$pending) {
      $alldone = 1;
    }
    elsif ($allexpired && $allow_aborting_of_expired) {
      # avoid looping forever if we haven't got all results.
      dbg("async: escaping: lost or timed out requests or responses");
      $self->abort_remaining_lookups();
      $alldone = 1;
    }
    else {
      dbg("async: queries active: %s%s at %s",
          join (' ', map { "$_=$typecount{$_}" } sort keys %typecount),
          $allexpired ? ', all expired' : '', scalar(localtime(time)));
      $alldone = 0;
    }
    1;

  } or do {
    my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
    # resignal if alarm went off
    die "async: (3) $eval_stat\n"  if $eval_stat =~ /__alarm__ignore__\(.*\)/s;
    dbg("async: caught complete_lookups death, aborting: %s", $eval_stat);
    $alldone = 1;      # abort remaining
  };

  return wantarray ? ($alldone,$anydone) : $alldone;
}

# ---------------------------------------------------------------------------

=item $async->abort_remaining_lookups()

Abort any remaining lookups.

=cut

sub abort_remaining_lookups {
  my ($self) = @_;

  my $pending = $self->{pending_lookups};
  my $foundcnt = 0;
  my $now = time;

  while (my($key,$ent) = each %$pending) {
    dbg("async: aborting after %.3f s, %s: %s",
        $now - $ent->{start_time},
        (defined $ent->{timeout_initial} &&
         $now > $ent->{start_time} + $ent->{timeout_initial}
           ? 'past original deadline' : 'deadline shrunk'),
        $ent->{display_id} );
    $foundcnt++;
    $self->{timing_by_query}->{"X $key"} = $now - $ent->{start_time};
    $ent->{finish_time} = $now  if !defined $ent->{finish_time};
    delete $pending->{$key};
  }

  # call any remaining callbacks, indicating the query has been aborted
  #
  my $all_lookups_ref = $self->{all_lookups};
  foreach my $dnskey (keys %$all_lookups_ref) {
    my $dns_query_info = $all_lookups_ref->{$dnskey};
    my $cb_count = 0;
    foreach my $tuple (@{$dns_query_info->{applicants}}) {
      my($ent, $cb) = @$tuple;
      if ($cb) {
        dbg("async: calling callback/abort on key %s%s", $dnskey,
            !defined $ent->{rulename} ? '' : ", rule ".$ent->{rulename});
        $cb_count++;
        eval {
          $cb->($ent, undef); 1;
        } or do {
          chomp $@;
          # resignal if alarm went off
          die "async: (2) $@\n"  if $@ =~ /__alarm__ignore__\(.*\)/s;
          warn sprintf("query %s aborted, callback %s failed: %s\n",
                       $dnskey, $ent->{key}, $@);
        };
      }
      dbg("async: query $dnskey aborted, no callbacks run")  if !$cb_count;
    }
    delete $dns_query_info->{applicants};
  }

  dbg("async: aborted %d remaining lookups", $foundcnt)  if $foundcnt > 0;
  delete $self->{last_poll_responses_time};
  $self->{main}->{resolver}->bgabort();
  1;
}

# ---------------------------------------------------------------------------

=item $async->set_response_packet($id, $pkt, $key, $timestamp)

Register a "response packet" for a given query.  C<$id> is the ID for the
query, and must match the C<id> supplied in C<start_lookup()>. C<$pkt> is the
packet object for the response. A parameter C<$key> identifies an entry in a
hash %{$self->{pending_lookups}} where the object which spawned this query can
be found, and through which further information about the query is accessible.

C<$pkt> may be undef, indicating that no response packet is available, but a
query has completed (e.g. was aborted or dismissed) and is no longer "pending".

The DNS resolver's response packet C<$pkt> will be made available to a callback
subroutine through its argument as well as in C<$ent-<gt>{response_packet}>.

=cut

sub set_response_packet {
  my ($self, $id, $pkt, $key, $timestamp) = @_;
  $self->{finished}->{$id} = 1;  # only key existence matters, any value
  $timestamp = time  if !defined $timestamp;
  my $pending = $self->{pending_lookups};
  if (!defined $key) {  # backward compatibility with 3.2.3 and older plugins
    # a third-party plugin did not provide $key in a call, search for it:
    if ($id eq $pending->{$id}->{id}) {  # I feel lucky, key==id ?
      $key = $id;
    } else {  # then again, maybe not, be more systematic
      for my $tkey (keys %$pending) {
        if ($id eq $pending->{$tkey}->{id}) { $key = $tkey; last }
      }
    }
    dbg("async: got response on id $id, search found key $key");
  }
  if (!defined $key) {
    info("async: no key, response packet not remembered, id $id");
  } else {
    my $ent = $pending->{$key};
    my $ent_id = $ent->{id};
    if (!defined $ent_id) {
      # should not happen, troubleshooting
      info("async: ignoring response, id %s, ent_id is undef: %s",
           $id, join(', ', %$ent));
    } elsif ($id ne $ent_id) {
      info("async: ignoring response, mismatched id $id, expected $ent_id");
    } else {
      $ent->{finish_time} = $timestamp;
      $ent->{response_packet} = $pkt;
    }
  }
  1;
}

=item $async->report_id_complete($id,$key,$key,$timestamp)

Legacy. Equivalent to $self->set_response_packet($id,undef,$key,$timestamp),
i.e. providing undef as a response packet. Register that a query has
completed and is no longer "pending". C<$id> is the ID for the query,
and must match the C<id> supplied in C<start_lookup()>.

One or the other of C<set_response_packet()> or C<report_id_complete()>
should be called, but not both.

=cut

sub report_id_complete {
  my ($self, $id, $key, $timestamp) = @_;
  $self->set_response_packet($id, undef, $key, $timestamp);
}

# ---------------------------------------------------------------------------

=item $time = $async->last_poll_responses_time()

Get the time of the last call to C<poll_responses()> (which is called
from C<complete_lookups()>.  If C<poll_responses()> was never called or
C<abort_remaining_lookups()> has been called C<last_poll_responses_time()>
will return undef.

=cut

sub last_poll_responses_time {
  my ($self) = @_;
  return $self->{last_poll_responses_time};
}  

1;

=back

=cut