=head1 NAME

Rcs::Parser - Parse and analyze RCS files.

=head1 SYNOPSIS

A basic RCS parser. This file does not rely upon any external utilities to
parse RCS files. Currently it functions with RCS files generated by GNU 
CVS and as documented by the rcsfile(5) man page. 

=head1 EXAMPLE USAGE

Retrieve the most recent file from an archive:

  my $rcs = new Rcs::Parser;
  my $ret = $rcs->load($filename);
  
  my $current = $rcs->recent_version;

To retrieve a specific version:

  my $rcs = new Rcs::Parser;
  my $ret = $rcs->load($filename);
  
  my $specific_version = $rcs->get('1.2');

=cut

# shorthand document of memory structure
#
# - version
# - body
#   - line #
#     - line
#     - origin
#     - new_lines
# - line_map

#*************************************************************************

package Rcs::Parser;
$Rcs::Parser::VERSION = '0.07';

use 5.006;
use Sort::Versions;
use warnings;
use strict;

sub new {
  bless {}, $_[0]
}

=head1 METHODS:

=head2 author($version)

my $author = $rcs->author($version);

The author method returns the author name of the given version. If
no version is given it returns the author of the current loaded version.

=cut 

sub author {
  my $self = shift @_;
  my $ver  = shift @_ || $self->version;
  return $self->{rcs}->{$ver}->{author} || undef;
}

=head2 date($version)

my $date = $rcs->date($version);

The date method returns the revision date of the given version. If
no version is given it returns the date of the current version.

=cut

sub date {
  my $self = shift @_;
  my $ver  = shift @_ || $self->recent_version;
  return $self->{rcs}->{$ver}->{date} || undef;
}

=head2 get($version)

my $ret = $rcs->get($version);

This method causes the given version to be retrieved from the archive

=cut

sub get {
  my $self = shift @_;
  my $ver  = shift @_ || $self->recent_version;
  
  return undef unless $self->{rcs}->{$ver};

  my @chain = $self->_revision_path($self->{current_document}->{version},$ver);

  for my $version ( @chain ) {
    $self->_debug("--> loading delta $version");
    $self->_apply_delta($version);
    $self->_sort;
  }

  return $self->_dump;
}

=head2 load($filename)

my $ret = $rcs->load($filename);

The load command reads in and parses the given filename. If the
file does not exist or is unreadable by the script, undef is 
returned. Otherwise, 1 is returned upon success.

=cut

sub load {
  my $self      = shift @_;
  $self->{file} = shift @_;
  return undef unless -f $self->{file};

  my $doc_header;
  $self->{rcs} = {};

  open RCSFILE, '<', $self->{file};
  $self->{rawfile} = join('',<RCSFILE>);
  close RCSFILE;

  $self->_parse_in_rcs($self->{rcs});

  # populate the current doc
  
  $self->{current_document}->{version} = $self->recent_version;

  for my $line ( split /\n/, $self->{rcs}->{$self->recent_version}->{text} ) {
    push @{ $self->{current_document}->{body}->{1}->{new_lines} }, $self->_unquote($line) . "\n";
  }

  # resort it

  $self->_sort;

  return 1;
}

=head2 load_scalar($data)

my $ret = $rcs->load_scalar($data);

The load command reads in and parses the provided RCS file in scalar 
form. 1 is returned upon success.

=cut

sub load_scalar {
  my $self  = shift @_;
  my $data  = shift @_;

  my $doc_header;
  $self->{rcs} = {};

  my @raw = map { "$_\n" } split "\n", $data;

  $self->{rawfile} = \@raw; # The rawfile is steadily deleted as it is parsed
  $self->_parse_in_rcs($self->{rcs});

  # populate the current doc
  
  $self->{current_document}->{version} = $self->recent_version;

  for my $line ( split /\n/, $self->{rcs}->{$self->recent_version}->{text} ) {
    push @{ $self->{current_document}->{body}->{1}->{new_lines} }, $self->_unquote($line) . "\n";
  }

  # resort it

  $self->_sort;

  return 1;
}

=head2 notate()

my $ret = $rcs->notate()

This method builds and assembled statistical information for verions.

=cut

sub notate {
  my $self = shift @_;
  my $ver  = $self->recent_version;

  my $note = {};
  
  return undef unless $self->{rcs}->{$ver};

  $self->_grab_note($note);

  
  my @chain = $self->_revision_path($self->{current_document}->{version});

  for my $version ( @chain ) {
    $self->_debug("--> loading delta $version");
    $self->_apply_delta($version);
    $self->_sort;
    $self->_grab_note($note);
  }

  for my $version ( reverse @chain, $self->recent_version ) {
    if ( $version eq $chain[$#chain] ) {
      $self->_debug("Mapping $version pro facia ...");
      map { $note->{$version}->{body}->{$_}->{origin} = $version; } keys %{$note->{$version}->{body}};
    } else {
      $self->_debug("Mapping $version via line count ...");
      my $error;
      for my $line ( keys %{$note->{$version}->{body}} ) {
        
        my $author_ver = $version;
        my $test_line  = $line;
        my $test_ver   = $self->previous_version($version);
        
        $error++ unless defined $test_ver;
        
        while ( $note->{$test_ver}->{line_map}->{$test_line} ) {
          $author_ver = $test_ver;
          $test_line  = $note->{$test_ver}->{line_map}->{$test_line};
          $test_ver   = $self->previous_version($author_ver) || last; #??? break if we bottom on version?
        } 
      
        $note->{$version}->{body}->{$line}->{origin} = $author_ver;
      }
      warn "WARN: $error lines didn't map well for $self->{file} $version"
            if $error > 0;
    }
  }

  return $note;
}

sub _grab_note {
  my $self = shift @_;
  my $ref  = shift @_;  
  my $ver = $self->{current_document}->{version};
  for my $line ( keys %{ $self->{current_document}->{body} } ) {
    $ref->{$ver}->{body}->{$line}->{line}   = length $self->{current_document}->{body}->{$line}->{line};
    $ref->{$ver}->{body}->{$line}->{origin} = $self->{current_document}->{body}->{$line}->{origin};  
  }
  for my $map ( keys %{ $self->{current_document}->{line_map} } ) {
    $ref->{$ver}->{line_map}->{$map} = $self->{current_document}->{line_map}->{$map};
  }

  return 1;
}

=head2 all_versions()

my @versions = $rcs->all_versions();

This method returns an array or arrayref of all versions stored in the RCS file.

=cut

sub all_versions {
  my $self = shift @_;
  unless ( defined $self->{all_versions} ) {
    $self->{all_versions} = [ sort { versioncmp($b,$a) } grep !/(header|desc)/, keys %{$self->{rcs}} ];
  }
  return wantarray ? @{$self->{all_versions}} : $self->{all_versions};
}

=head2 previous_version()

my $ver = $rcs->previous_version();

This method returns the version previous to the currently instanced version.

=cut

sub previous_version {
  my $self = shift @_;
  my $ver  = shift @_ || $self->recent_version;
  return $self->{rcs}->{revision_path}->{$ver};
}

=head2 recent_version()

my $ver = $rcs->recent_version();

This method returns the most current revision of the file.

=cut

sub recent_version {
  my $self = shift @_;
  return $self->{rcs}->{header}->{head};
}

=head2 version()

my $ver = $rcs->version();

This method returns the currently instanced version.

=cut

sub version {
  my $self = shift @_;
  return $self->{current_document}->{version};
}

#### Private methods

sub _apply_delta {
  my $self = shift @_;
  my $ver  = shift @_;
  my $doc  = shift @_ || $self->{current_document};

  my $raw_delta = $self->{rcs}->{$ver}->{text};
  $doc->{version} = $ver;

  my @deltas = split /\n/, $raw_delta;

  while ( @deltas ) {
    my $delta = shift @deltas;
    if ( $delta =~ /^a(\d+) (\d+)$/ ) { 
      my $line  = $1;
      my $count = $2;
      my $test  = 0;
      
      for ( my $c = 0; $c < $count; $c++ ) {
        my $new_line = $self->_unquote( shift @deltas ) . "\n";
        push @{ $doc->{body}->{$line}->{new_lines} }, $new_line;
        $test++;
      }
      $self->_debug("added $test lines at $line ($count directed)"); 
 
    } elsif ( $delta =~ /^d(\d+) (\d+)$/ ) { 

      my $first_line = $1;
      my $last_line  = $1 + $2 - 1;
      for my $line ( $first_line .. $last_line ) {
        $doc->{body}->{$line}->{line} = undef;
      }
      $self->_debug("deleting lines $first_line through $last_line ($2 directed)"); 
    } else { 
      warn "ORPHAN DELTA COMMAND! $delta\n"; 
    }
  }
  return 1;
}

sub _create_full_revision_path {
  my $self = shift @_;
  $self->_debug('Building full revision path for reference...');

  $self->{rcs}->{revision_path} = {};
  $self->{rcs}->{reverse_revision_path} = {};
  
  my $version = $self->recent_version;
  while ( my $next = $self->{rcs}->{$version}->{next} ) {
    $self->{rcs}->{revision_path}->{$version} = $next;
    $self->_debug("  $version -> $next");
    $version = $next;
  }

  #map { $self->{rcs}->{reverse_revision_path}->{$self->{rcs}->{revision_path}->{$_}} = $_ } keys %{$self->{rcs}->{revision_path}};

  $self->_debug('Built a revision path of ' . scalar( keys %{$self->{rcs}->{revision_path}} ) . ' jumps.');
}

sub _debug {
  my $self = shift @_;
  my $mesg = shift @_;
  chomp $mesg;
  print "DEBUG: $mesg\n" if $self->{debug};
}

sub _dump {
  my $self = shift @_;
  my $doc  = shift @_ || $self->{current_document};
  return join '', map { $doc->{body}->{$_}->{line} } sort { $a <=> $b } keys %{ $doc->{body} };
}

sub _parse_in_rcs {
  my $self = shift @_;
  my $rcs  = shift @_;

  ### Parse in the RCS file header

  my $rcs_header;

  while ( $self->{rawfile} =~ /\G(.+)\n/gcm ) {
    $rcs_header .= $1;
    $self->_debug("Adding line to the raw header.");
  }

  for my $chunk ( split /;/, $rcs_header ) {
    $chunk =~ s/\n//g;
    $chunk =~ /^\s*(\w+)\s*(.*)$/;
    $rcs->{header}->{$1} = $2 if $1;
  }

  $rcs_header = undef;

  ### Blank lines
  
  1 while ( $self->{rawfile} =~ /\G\n/gcm );
  
  ### Parse in the individual version headers

  while ( $self->{rawfile} =~ /\G(\d(\.|\d)+)(.+?)\n\n/gcs ) {
    $rcs->{$1}->{header} = $2;    
    $self->_debug("vheader $1 is ".length($1)." chars in size") if $self->{debug};
  }

  ### Blank lines
  
  1 while ( $self->{rawfile} =~ /\G\n/gcm );

  ### Parse in the desc

  if ( $self->{rawfile} =~ /\Gdesc\n\@\@\n/gcs || $self->{rawfile} =~ /\Gdesc\n\@(.+?)(?<!\@)\@\n/gcs ) {
    $rcs->{desc} = $1;
  }

  ### Blank lines
  
  1 while ( $self->{rawfile} =~ /\G\n/gcm );

  ### Change directives

  while ( $self->{rawfile} =~ /\G(\d(\.|\d)+)\n/gcm ) {
    my $version = $1;

    while ( $self->{rawfile} =~ /\G(\w+)\n\@\@\n/gcs || $self->{rawfile} =~ /\G(\w+)\n\@(.+?)(?<!\@)\@\n/gcs ) {
      $rcs->{$version}->{$1} = $2;
      $self->_debug("directive '$1' for ver $version is ".length($2)." chars in size") if $self->{debug};
    }

    1 while ( $self->{rawfile} =~ /\G\n/gcm ); # Blank lines
  }

  ### blank lines

  1 while ( $self->{rawfile} =~ /\G\n/gcm );

  $self->{finalpos} = pos($self->{rawfile}); # Done parsing? Remember position.

  ### disassemble header

  for my $version ( keys %$rcs ) {
    my @commands;
    @commands = split ';', $rcs->{$version}->{header} if defined $rcs->{$version}->{header};
    for my $command ( @commands ) {
      chomp $command;
      $rcs->{$version}->{$1} = $2 if $command =~ /\s*(.+)\s+(.+)$/;
    }
  }

  return 1;
}

sub _revision_path {
  my $self = shift @_;
  my $ver  = shift @_ || $self->recent_version;
  my $stop = shift @_ || undef;
  
  return undef unless $self->{rcs}->{$ver};
  return () if $ver eq $stop;  
  
  $self->_debug('Checking revision path...');
  $self->_create_full_revision_path() unless $self->{rcs}->{revision_path};

  my @chain;
  while ( my $next = $self->{rcs}->{revision_path}->{$ver} ) { 
    push @chain, $next;
    last if $next eq $stop;
    $ver = $next;
  }  

   $self->_debug("CHAIN: " . (join ' -> ', @chain));
  return @chain;
}

sub _sort {
  my $self = shift @_;
  my %copy = %{ $self->{current_document} };

  $self->{current_document} = {};
  $self->{current_document}->{version} = $copy{version};

  my $count = 1;
  for my $line_num ( sort { $a <=> $b } keys %{ $copy{body} } ) {
    # add basic line
    if ( $copy{body}{$line_num}{line} ) {
      $self->{current_document}->{body}->{$count}->{line} = $copy{body}{$line_num}{line};
      $self->{current_document}->{line_map}->{$count} = $line_num;
      $count++;
    }
    # add new lines if existant
    if ( $copy{body}{$line_num}{new_lines} ) {
      for my $line ( @{ $copy{body}{$line_num}{new_lines} } ) {
        $self->{current_document}->{body}->{$count}->{line} = $line;
        $count++;
      }
    }
  }

  %copy = ();

  $self->_debug( "($self->{current_document}->{version}) " . --$count . ' lines sorted...');

  return 1;
}

sub _unquote {
  my $self = shift @_;
  my $in   = shift @_;
  $in =~ s/\@\@/\@/g;
  return $in;
}

1;

=head1 KNOWN ISSUES

Beta Code:

This code is beta. It has yet to fully understand binary formats stored
in RCS and will treat them as text. Consquently, you'll see warnings. That
being said, there shouldn't be any large scale bugs that will cause
segfaulting or crashing. Only warnings.

The RCS file format:

There is an astounding lack of good documentation of the RCS format. About
the only thing that can be found is the rcsfile(5) man page. The layout is
mostly reverse engineered in this module. I have yet to have the time, or
the skill and patience to disassemble the RCS portions of the code for GNU
CVS.

=head1 ERATTA

  Q: Why 'Rcs' and not 'RCS'

  A: Because the any directory named 'RCS' is usually ignored by most 
     versioning software and some developer tools. 

=head1 BUGS AND SOURCE

	Bug tracking for this module: https://rt.cpan.org/Dist/Display.html?Name=Rcs-Parser

	Source hosting: http://www.github.com/bennie/perl-Rcs-Parser

=head1 VERSION

    Rcs::Parser v0.07 (2014/02/22)

=head1 COPYRIGHT

    (c) 2001-2014, Phillip Pollard <bennie@cpan.org>

=head1 LICENSE

This source code is released under the "Perl Artistic License 2.0," the text of 
which is included in the LICENSE file of this distribution. It may also be 
reviewed here: http://opensource.org/licenses/artistic-license-2.0