# $Id: Injection.pm 119 2013-01-27 15:07:06Z jo $
# Cindy::Injection - Injections are the elements of content injection 
# sheets.
#
# Copyright (c) 2008 Joachim Zobel <jz-2008@heute-morgen.de>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#

package Cindy::Injection;

use strict;
use warnings;

use XML::LibXML;
use constant HAS_SELECTORS => eval {
    require HTML::Selector::XPath;
};

use Cindy::Log;
use Cindy::Profile;
use Cindy::XPathContext;
require Cindy::Action;

#
# Create an Injection.
# The first 4 parameters are passed as a 
# list without names.
#
# xdata - the data selector
# action - the name of the action
# xdoc - the document selector
# selector - use 'xpath' or 'css'
#
# atname - attribute name (only for attribute action)
# subsheet - the subsheet (only for repeat):w! 
# xfilter - the filter condition (only for repeat)
#
sub new
{
  my $class = shift;
  my ($xdata, $action, $xdoc, $selector) = @_; 
  my %parms = @_[4..$#_];

  #get_logger->level($DEBUG);

  $xdata = css_to_xpath($xdata) if ($selector eq 'css');
  $xdoc = css_to_xpath($xdoc) if ($selector eq 'css');

  my $self = {
    xdata  => $xdata,
    action  => $action,
    xdoc => $xdoc,
  };

  # The meaning of the optional argument differs
  # depending on the action. 
  $self->{atname} = $parms{atname} 
      if ( $action eq 'attribute' );
  $self->{subsheet} = $parms{sublist} 
      if ( $action eq 'repeat'
        or $action eq 'none' );
  $self->{xfilter} = $parms{xfilter} 
      if ( $action eq 'repeat' );

  return bless($self, $class); 
}

#
# Make a copy
#
sub clone($)
{
  my ($self) = @_;

  my %rtn = %{$self};

  return bless(\%rtn, ref($self));
}

my $prof = Cindy::Profile->new();
sub dump_profile()
{
  $prof = Cindy::Profile->new();
}
END {
  $prof = undef;
}

#
# Wrapper for find.
#
sub find_matches($$) {
  my ($data, $xpath) = @_;

  my @data_nodes = ();

  DEBUG "Matching '$xpath'.";

  # No xpath, no results
  return @data_nodes unless ($xpath);
  # No data, no results
  return @data_nodes unless (defined $data);

  my $cp = Cindy::Profile::before();   
  my $found = $data;
  # . matches happen very often and are quite expensive
  if ($xpath ne '.') {
    my $xpc = Cindy::XPathContext->new($data);
    $found = $xpc->find( $xpath );
  }
  $prof->after($cp, $xpath);
  if ($found->isa('XML::LibXML::NodeList')) {
    @data_nodes = $found->get_nodelist();
    DEBUG "Found "
            # toString is not called automagically
            .join('|', map {$_->toString();} @data_nodes).'.';
  } else {
    DEBUG "Matched '$xpath', found $found.";
    @data_nodes = ($found);
  }


  return @data_nodes;
}

#
# Helper for debugging
#
sub debugNode($)
{
  my ($nd) = @_;
  return $nd. '/' .$nd->nodeName." (".$nd->nodeType.")";
}

#
# Matches all doc nodes
#
sub matchDoc($$)
{
  my ($self, $doc) = @_;
  return $self->match($doc, 'doc');
}

#
# Matches all data nodes 
# Note that "no data found" is expressed by 
# returning an injection where data is undef.
# This leaves the decision what to do to the action.
# Note that it differs from the handling of doc.
# As a result a data node that is not found 
# generally triggers removal.
# 
sub matchData($$)
{
  my ($self, $data) = @_;
  my @matches = $self->match($data, 'data');
  if (scalar(@matches) == 0) {
    my $rtn = $self->clone();
    $rtn->{data} = undef;
    return ($rtn);
  } else {
    return @matches;
  }
}

#
# Does doc/data matching. The xpath from xdoc/xdata
# is used to match nodes that are then stored as doc/data
# properties of cloned nodes. A list of such nodes is 
# returned.
#
# self - This injection.
# $context - The context node for the match.
# $what - One of 'doc' or 'data'.
# return - A list of self clones holding the matches.
#
sub match($$$)
{
  my ($self, $context, $what) = @_;

  # Find the nodes matching the xpath
  my @nodes = find_matches($context, $self->{"x$what"}); 

  my $cnt = scalar(@nodes);
  DEBUG "Matched $cnt $what nodes for action "
    .$self->{action}.".";

  my @rtn = ();
  foreach my $node (@nodes) {
    # clone self
    my $clone = $self->clone();
    $clone->{$what} = $node;
    push(@rtn, $clone);
  } 
  
  return @rtn;
}

#
# Convert w3c css selectors to XPath.
# This is copied from Naoki Tomitas Template-Semantic.
# 
my $element_with_attr_regex = qr{
    ^
    \s*
    (
        \@[^@]+? |
        (?:
            (?: [^"]+? | .+?"[^"]+".+? )
            (?: \@[^@]+? )?
        )
    )
    \s*
    (?: , | $ )
}x;

sub css_to_xpath {
    my ($inp) = @_;
   
    # The dependency is optional 
    if (!HAS_SELECTORS) {
        ERROR "Tried to use css selctor $inp, but HTML::Selector::XPath is not installed.";
        return $inp;
    }

    my $exp = $inp;
    my $xpath;
    {
        # css selector extends @attr syntax
        my @x;
        while ($exp =~ s/$element_with_attr_regex//) {
            my $e = $1;
            my ($elem, $attr) = $e =~ m{(.*?)/?(@[^/@]+)?$};
            my $x;
            if ($elem) {
                my $x = HTML::Selector::XPath::selector_to_xpath($elem);
                   $x .= "/$attr" if $attr;
                push @x, $x;
            } elsif ($attr) {
                push @x, "//$attr";
            }
        }
        $xpath = join " | ", @x;
    }

    # We use "" as a way to express . 
    $xpath =~ s{^$}{.};
    my @xpaths = split (/\|/, $xpath);
    # We need expressions relative to the context node
    $xpath = join('|', map {my $r = $_; 
                            $r =~ s{/}{./}; 
                            $r;}            @xpaths);

    INFO "Translated $inp to $xpath.";

    return $xpath;
}

#
# Check if the injection matches a filter expression.
# return ($self) in case of a match, () otherwise. 
#
sub filter 
{
  my ($self) = @_;
  my $xfilter = $self->{xfilter};
  if ( not $xfilter 
       # avoid filtering the remove action
    or $self->{action} ne 'repeat') {
    return ($self); 
  }

  #INFO "Filtering with $xfilter.";  

  my $fragment = XML::LibXML::DocumentFragment->new();
  my $context = XML::LibXML::Element->new( 'ROOT' );
  my $doc = XML::LibXML::Element->new( 'DOC' );
  my $data = XML::LibXML::Element->new( 'DATA' );

  $fragment->appendChild($context);
  $context->appendChild($doc);
  $context->appendChild($data);

  $doc->appendChild($self->{doc}->cloneNode(1)); # if ($self->{doc}->toString());
  $data->appendChild($self->{data}->cloneNode(1)); # if ($self->{data}->toString());

  my @found = find_matches($context, 
                  "self::node()[boolean($xfilter)]");
  if ( scalar(@found) >= 1 ) { 
      DEBUG "Match. Kept.";
      return ($self);
  } else {
    DEBUG "No match. Removed.";
    return;
  }
}

#
# Execute a member function on all subsheet elements
# and replace the subsheet with the concatenated returns
# of the calls.
#
sub subsheetsDo($$)
{
  my ($self, $do) = @_;
  DEBUG "Entered subsheetsDo.";

  # Without a subsheet, nothing is done.
  if ($self->{subsheet}) {
    DEBUG "Found subsheet.";

    my @subsheets = ();
    foreach my $inj (@{$self->{subsheet}}) {
      push(@subsheets, &{$do}($inj));
    }
    { # Check for removals
      my ($cnt_bef, $cnt_aft) =
          (scalar(@{$self->{subsheet}}), scalar(@subsheets));
      DEBUG "Length of subsheet reduced from $cnt_bef to $cnt_aft."
          if ($cnt_bef != $cnt_aft);
    }
    $self->{subsheet} = \@subsheets;
  }
}

#
# Returns an additional remove action to remove the original 
# of the target doc node after a sequence of replace actions.
#
sub appendRemoveToRepeat()
{
  my ($self) = @_;

  if ($self->{'action'} eq 'repeat') {
    DEBUG "Appending remove.";

    # rmv has the same doc node as inj.
    my $rmv = $self->clone();

    # We need a cheap match, since matchData
    # will be done. The result of the match will 
    # be ignored anyway.
    $rmv->{xdata} = '.';
    $rmv->{action} = 'remove';

    return ($self, $rmv); 
  }
  
  return ($self);
}
  
#
# Executes nodes where doc and data have been matched 
# before. Execution directly changes the doc.
#
sub execute()
{
  my ($self) = @_;

  DEBUG "Will execute $self->{action}.";

  if ($self->{action} eq 'repeat') {
    my $newdoc = 
    action($self->{action},
           $self->{data},
           $self->{doc},
           $self->{atname});
    if (defined($newdoc)) {
      $self->{doc} = $newdoc;
    }
  } else {
    action($self->{action},
           $self->{data},
           $self->{doc},
           $self->{atname});
  }

  return ($self);
}

#
# This does all the work on the subsheet.
# The subsheet is a list of injections. It
# may get longer during the steps of run.
# The doc side is matched first because the 
# in case of repeat the matched doc fragments
# are copied.
sub run($;$$)
{
  my ($self, $dataroot, $docroot) = @_;
  $dataroot ||= $self->{data}; 
  $docroot  ||= $self->{doc};

  return ($self) unless $self->{subsheet};

  # Match all doc nodes.
  DEBUG "WILL MATCH DOC";
  $self->subsheetsDo(sub {$_[0]->matchDoc($docroot)});
  # Append remove to all repeat nodes
  DEBUG "WILL APPEND REMOVE";
  $self->subsheetsDo(sub {$_[0]->appendRemoveToRepeat();});
  # Match all data nodes
  DEBUG "WILL MATCH DATA";
  $self->subsheetsDo(sub {$_[0]->matchData($dataroot)});
  # Filter all subsheets
  DEBUG "WILL FILTER";
  $self->subsheetsDo(sub {$_[0]->filter($self->{xfilter})});
  # Execute the actions.
  DEBUG "WILL EXECUTE"; 
  $self->subsheetsDo(sub {$_[0]->execute();});  

  # Recursion into the subsheets subsheets.
  DEBUG ">>>>> WILL RUN";
  $self->subsheetsDo(sub {$_[0]->run();});  
  DEBUG ">>>>> DID RUN";  

  return ($self);
}

#
# Stringifies a node.
#
sub dbg_dump($)
{
  my ($x) = @_; 
  return 'undef' if (!defined($x));
  return $x->toString() if ($x->can('toString'));
  return $x;
}

#
# A funtion to execute the named action by calling the
# Action::<action> function.
#
sub action($$$;$)
{
  my ($action, $data, $node, $opt) = @_;

  DEBUG "Doing $action on ".dbg_dump($node)." with ".
            dbg_dump($data).":";

  $action =~ s/-/_/g;
  # This is possibel with strict refs
  my $call = \&{"Cindy::Action::$action"};
  my $rtn = &$call($node, $data, $opt);

  DEBUG $node->toString()."\n\n";  

  return $rtn;
}

1;