# Please direct questions and support issues to <bioperl-l@bioperl.org> 
#
# Cared for by Shawn Hoon
#
# Copyright Shawn Hoon
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::Tools::Run::Vista

Wrapper for Vista

=head1 SYNOPSIS

  use Bio::Tools::Run::Vista;
  use Bio::Tools::Run::Alignment::Lagan;
  use Bio::AlignIO;

  my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'genbank');
  my @seq;
  my $reference = $sio->next_seq;
  push @seq, $reference;
  while(my $seq = $sio->next_seq){
    push @seq,$seq;
  }
  my @features = grep{$_->primary_tag eq 'CDS'} $reference->get_SeqFeatures;

  my $lagan = Bio::Tools::Run::Alignment::Lagan->new;

  my $aln = $lagan->mlagan(\@seq,'(fugu (mouse human))');


  my $vis = Bio::Tools::Run::Vista->new('outfile'=>"outfile.pdf",
                                        'title' => "My Vista Plot",
                                        'annotation'=>\@features,
                                        'annotation_format'=>'GFF',
                                        'min_perc_id'=>75,
                                        'min_length'=>100,
                                        'plotmin'   => 50,
                                        'tickdist' => 2000,
                                        'window'=>40,
                                        'numwindows'=>4,
                                        'start'=>50,
                                        'end'=>1500,
                                        'tickdist'=>100,
                                        'bases'=>1000,
                                        'java_param'=>"-Xmx128m",
                                        'num_pages'=>1,
                                        'color'=> {'EXON'=>'100 0 0',
                                                   'CNS'=>'0 0 100'},
                                        'quiet'=>1);

  my $referenceid= 'human';
  $vis->run($aln,$referenceid); 

  #alternative one can choose pairwise alignments to plot
  #where the second id in each pair is the reference sequence
  $vis->run($aln,([mouse,human],[fugu,human],[mouse,fugu]));

=head1 DESCRIPTION

Pls see Vista documentation for plotfile options

Wrapper for Vista :

C. Mayor, M. Brudno, J. R. Schwartz, A. Poliakov, E. M. Rubin, K. A.  Frazer, 
L. S. Pachter, I. Dubchak. 
VISTA: Visualizing global DNA  sequence alignments of arbitrary length.
Bioinformatics, 2000  Nov;16(11):1046-1047.
Get it here:
http://www-gsd.lbl.gov/vista/VISTAdownload2.html

On the command line, it is assumed that this can be executed:

java Vista plotfile

Some of the code was adapted from MLAGAN toolkit

M. Brudno,  C.B. Do,  G. Cooper,  M.F. Kim,  E. Davydov,  NISC Sequencing Consortium, 
E.D. Green,  A. Sidow and S. Batzoglou 
LAGAN and Multi-LAGAN: Efficient Tools for Large-Scale Multiple  Alignment of Genomic 
DNA, Genome Research, in press

get lagan here:

http://lagan.stanford.edu/

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support 

Please direct usage questions or support issues to the mailing list:

I<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via the
web:

  http://redmine.open-bio.org/projects/bioperl/

=head1 AUTHOR

Shawn Hoon
Email shawnh@fugu-sg.org

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut

package Bio::Tools::Run::Vista;

use vars qw($AUTOLOAD @ISA %DEFAULT_VALUES %EPONINE_PARAMS
       	   @VISTA_PARAMS  $EPOJAR $JAVA $PROGRAMDIR $PROGRAMNAME $PROGRAM
            %OK_FIELD);
use strict;

use Bio::Root::Root;
use Bio::Seq;
use Bio::Root::IO;
use Bio::Tools::Run::WrapperBase;
use File::Copy;
@ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);

BEGIN {
    $PROGRAMNAME = 'java';

    if( ! defined $PROGRAMDIR ) {
    	$PROGRAMDIR = $ENV{'JAVA_HOME'} || $ENV{'JAVA_DIR'};
    }
    if (defined $PROGRAMDIR) {
    	foreach my $progname ( [qw(java)],[qw(bin java)] ) {
  	    my $f = Bio::Root::IO->catfile($PROGRAMDIR, @$progname);
  	    if( -e $f && -x $f ) {
      		$PROGRAM = $f;
      		last;
  	    }
    	}
    }

    %DEFAULT_VALUES= ('java'     => 'java',
                      'min_perc_id'   => 75,
                      'min_length'   => 100,
                      'plotmin'      => 50,
                      'bases'    => 10000,
                      'tickdist' => 2000,
                      'resolution'=> 25,
                      'window'  => 40,
                      'title'   => 'VISTA PLOT',
                      'numwindows'=>4);

    @VISTA_PARAMS=qw(JAVA JAVA_PARAM OUTFILE MIN_PERC_ID QUIET VERBOSE ANNOTATION_FORMAT
                     REGION_FILE REGION_FILE_DIR SCORE_FILE SCORE_FILE_DIR ALIGNMENT_FILE_DIR
                     ALIGNMENT_FILE CONTIGS_FILE DIFFS PLOTFILE
                     MIN_LENGTH PLOTMIN ANNOTATION BASES TICKDIST RESOLUTION TITLE PAPER
                     WINDOW NUMWINDOWS START END NUM_PLOT_LINES LEGEND FILENAME NUM_PAGES
                     AXIS_LABEL TICKS_FILE COLOR USE_ORDER GAPS SNPS_FILE REPEATS_FILE 
                     FILTER_REPEATS);

    foreach my $attr ( @VISTA_PARAMS)
    { $OK_FIELD{$attr}++; }
}

sub AUTOLOAD {
    my $self = shift;
    my $attr = $AUTOLOAD;
    $self->debug( "************ attr:  $attr\n");
    $attr =~ s/.*:://;
    $attr = uc $attr;
    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
    $self->{$attr} = shift if @_;
    return $self->{$attr};
}

=head2 new

    Title   :   new
    Usage   :   my $vis = Bio::Tools::Run::Vista->new('outfile'=>$out,
                                        'title' => "My Vista Plot",
                                        'annotation'=>\@features,
                                        'annotation_format'=>'GFF',
                                        'min_perc_id'=>75,
                                        'min_length'=>100,
                                        'plotmin'   => 50,
                                        'tickdist' => 2000,
                                        'window'=>40,
                                        'numwindows'=>4,
                                        'start'=>50,
                                        'end'=>1500,
                                        'tickdist'=>100,
                                        'bases'=>1000,
                                        'color'=> {'EXON'=>'100 0 0',
                                                   'CNS'=>'0 0 100'},
                                        'quiet'=>1);
    Function:   Construtor for Vista wrapper
    Args    :   outfile - location of the pdf generated
                annotation - either a file or and array ref of Bio::SeqFeatureI
                             indicating the exons 
                regmin     -region min

=cut

sub new {
    my ($caller, @args) = @_;
    # chained new
    my $self = $caller->SUPER::new(@args);
    # so that tempfiles are cleaned up
    foreach my $key(keys %DEFAULT_VALUES){
      $self->$key($DEFAULT_VALUES{$key});
    }
    while (@args)  {
       my $attr =   shift @args;
       my $value =  shift @args;
       next if( $attr =~ /^-/ ); # don't want named parameters
	    $self->$attr($value);
    }

    return $self;
}

=head2 java

    Title   :   java
    Usage   :   $obj->java('/usr/opt/java130/bin/java');
    Function:   Get/set method for the location of java VM
    Args    :   File path (optional)

=cut

sub executable { shift->java(@_); }

sub java {
   my ($self, $exe,$warn) = @_;

   if( defined $exe ) {
     $self->{'_pathtojava'} = $exe;
   }

   unless( defined $self->{'_pathtojava'} ) {
       if( $PROGRAM && -e $PROGRAM && -x $PROGRAM ) {
	   $self->{'_pathtojava'} = $PROGRAM;
       } else {
	   my $exe;
	   if( ( $exe = $self->io->exists_exe($PROGRAMNAME) ) &&
	       -x $exe ) {
	       $self->{'_pathtojava'} = $exe;
	   } else {
	       $self->warn("Cannot find executable for $PROGRAMNAME") if $warn;
	       $self->{'_pathtojava'} = undef;
	   }
       }
   }
   $self->{'_pathtojava'};
}


=head2 run

 Title   : run
 Usage   : my @genes = $self->run($seq)
 Function: runs Vista 
 Returns : A boolean 1 if no errors 
 Args    : Argument 1: Bio::Align::Align required
           Argument 2: a string or number, which is the sequence id of the
                       reference sequence or the rank of the sequence
                       in the alignment

=cut

sub run{
    my ($self,$align,$ref) = @_;
    $ref ||=1;
    my $infile = $self->_setinput($align,$ref);
    return $self->_run_Vista($infile);

}

=head2 _setinput

 Title   : _setinput
 Usage   : Internal function, not to be called directly
 Function: writes input sequence to file and return the file name
 Example :
 Returns : string
 Args    :

=cut

sub _setinput {
    my ($self,$sim_aln,$ref) = @_;
    my($pairs,$files) = $self->_mf2bin($sim_aln,$ref);
    my $plotfile = $self->_make_plotfile($sim_aln,$pairs,$files);
    return $plotfile;
}

sub _parse_multi_fasta {
  my ($self,$file) = @_;
  my %seq;
  open(FASTA, $file) || $self->throw("Couldn't open $file");
  my $last;
  my $count = 0;
  while (my $line = <FASTA>) {
    chomp $line;
    next if $line=~/^$/;
    if (substr($line, 0, 1) eq ">") {
        $_ = substr($line, 1);
        /\w+/g;
        $seq{$&} = "";
        $last = $&;
    } else {
        $seq{$last}.=$line;
    }
    print STDERR $count."\n";
    $count++;
  }
  my @seq;

  foreach my $key(keys %seq){
    my $seq = Bio::Seq->new(-id=>$key,-seq=>$seq{$key});
    push @seq,$seq;
  }
  return @seq;
}

#adapted from mlagan utils  mf2bin.pl 
sub _mf2bin {
  my ($self,$sim,$ref)= @_;
  my @seq;
  if(!ref $sim){
    @seq = $self->_parse_multi_fasta($sim);
  }
  else {
    ($sim && $sim->isa("Bio::Align::AlignI")) || $self->throw("Expecting a Bio::Align::AlignI");
    @seq = $sim->each_seq;
  }
  my $reference;
  my @files;
  my @pairs;
  if(ref($ref) eq 'ARRAY'){
    my @ref;
    foreach my $set(@$ref){
      my ($reference) = grep{$_->id eq $set->[1]}@seq;
      my ($other) = grep{$_->id eq $set->[0]}@seq;
      my ($pair,$file) = $self->_pack_bin($reference,$other);
      push @pairs, @$pair;
      push @files, @$file;
      push @ref,$set->[1];
    }
    $self->_coordinate(\@ref);
    return \@pairs,\@files;

  }
  #figure out the reference sequence
  elsif($ref =~/^\d+$/){ #its a rank index
    $reference = $seq[$ref-1];
    my $tmp = $ref;
    $ref = $reference->id;
    splice @seq,($tmp-1),1;
  }
  else { #its an id
    foreach my $i(0..$#seq){
      if($seq[$i]->id =~/$ref/){
        $reference  = $seq[$i];
        splice @seq,($i),1;
        last;
      }
    }
  }
  $self->_coordinate([$ref]);

  # pack bin
  # format from Alex Poliakov's glass2bin.pl script
  my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5,
                'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5);


 my @ref= (split ('',$reference->seq));

  foreach my $seq2(@seq){
      my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
      my @seq2= (split('', $seq2->seq)); 
      foreach my $index(0..$#ref){
        unless($ref[$index] eq '-' && $seq2[$index] eq '-'){
          print $tfh1 pack("H2",$base_code{$ref[$index]}.$base_code{$seq2[$index]});
        }
      }
      close ($tfh1);
      undef ($tfh1);
      push @files, $outfile;
      push @pairs,[$reference->id,$seq2->id];
  }
  return \@pairs,\@files;
}

sub _pack_bin {
  my ($self,$first,$sec) = @_;
  my @first = (split('',$first->seq));
  my @sec = (split('',$sec->seq));
  # pack bin
  # format from Alex Poliakov's glass2bin.pl script
  my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5,
                'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5);
  my @files;
  my @pairs;
  my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
  foreach my $index(0..$#first){
    unless($first[$index] eq '-' && $sec[$index] eq '-'){
      print $tfh1 pack("H2",$base_code{$first[$index]}.$base_code{$sec[$index]});
    }
  }
  close ($tfh1);
  undef ($tfh1);
  push @files, $outfile;
  push @pairs,[$first->id,$sec->id];
  return \@pairs,\@files;
}

sub _make_plotfile {
  my ($self,$sim_aln,$pairs,$files) = @_;
  my ($tfh1,$plotfile) = $self->io->tempfile(-dir=>$self->tempdir);
  my @ids = map{$_->id}$sim_aln->each_seq;
  
  print $tfh1 "TITLE ".$self->title."\n\n";
  print $tfh1 "OUTPUT ".$self->outfile."\n\n" ;
  print $tfh1 "SEQUENCES ";
  print $tfh1 join(" ",@ids)."\n\n";

  foreach my $index(0..$#$pairs){
    print $tfh1 "ALIGN ".$files->[$index]." BINARY\n";
    print $tfh1 " SEQUENCES ".$pairs->[$index]->[0]." ".$pairs->[$index]->[1]."\n";
    print $tfh1 " REGIONS ".$self->min_perc_id." ".$self->min_length."\n";
    print $tfh1 " MIN ".$self->plotmin."\n";
    print $tfh1 " DIFFS ". $self->diffs ."\n\n" if $self->diffs;
    if($self->region_file||$self->region_file_dir){
      my $file = " REGION_FILE ";
      $file.=$self->region_file_dir."/" if $self->region_file_dir;
      $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".region\n\n";
      print $tfh1 $file;
    }
    if($self->score_file || $self->score_file_dir){
      my $file = " SCORE_FILE ";
      $file.=$self->score_file_dir."/" if $self->score_file_dir;
      $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".score\n\n";
      print $tfh1 $file;
    }
    if($self->alignment_file || $self->alignment_file_dir){
      my $file = " ALIGNMENT_FILE ";
      $file.=$self->alignment_file_dir."/" if $self->alignment_file_dir;
      $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".alignment\n\n";
      print $tfh1 $file;
    }
       
    print $tfh1 " CONTIGS_FILE ". $self->contigs_file ."\n\n" if $self->contigs_file;
    print $tfh1 " USE_ORDER ". $self->use_order."\n\n" if $self->use_order;
    print $tfh1 "END \n\n";
  }
  my $annotation_file;
  if((ref $self->annotation eq 'ARRAY')&& $self->annotation->[0]->isa("Bio::SeqFeatureI")){
    $annotation_file = $self->_dump2gff($self->annotation);
    $self->annotation_format('GFF');
  }
  elsif($self->annotation){
    $annotation_file = $self->annotation;
  }
  $annotation_file .= " GFF" if $self->annotation_format=~/GFF/i;
  print $tfh1 "GENES ".$annotation_file." \n\n" if $annotation_file;
  print $tfh1 "LEGEND on\n\n";
  print $tfh1 "COORDINATE ".join(" ",@{$self->_coordinate})."\n\n";
  print $tfh1 "PAPER letter\n\n";
  print $tfh1 "BASES ".$self->bases."\n\n";
  print $tfh1 "TICK_DIST ".$self->tickdist."\n\n";
  print $tfh1 "RESOLUTION ".$self->resolution."\n\n";
  print $tfh1 "WINDOW ".$self->window."\n\n";
  print $tfh1 "NUM_WINDOWS ".$self->numwindows."\n\n";
  print $tfh1 "AXIS_LABEL ".$self->axis_label ."\n\n" if $self->axis_label;
  print $tfh1 "TICKS_FILE ".$self->ticks_file ."\n\n" if $self->ticks_file;
  print $tfh1 "SNPS_FILE"." ".$self->snps_file."\n\n" if $self->snps_file;
  print $tfh1 "GAPS ".$self->gaps ."\n\n"if $self->gaps;
  print $tfh1 "REPEATS_FILE ".$self->repeats_file ."\n\n" if $self->repeats_file;
  print $tfh1 "FILTER_REPEATS ".$self->filter_repeats ."\n\n" if $self->filter_repeats;
  print $tfh1 "NUM_PAGES ".$self->num_pages ."\n\n" if $self->num_pages;
  print $tfh1 "START ".$self->start ."\n\n" if $self->start;
  print $tfh1 "END ".$self->end ."\n\n" if $self->end;
  my $color = $self->color;
  if(ref $color eq 'HASH'){
    foreach my $region_type (keys %$color){
      print $tfh1 "COLOR ".$region_type." ".$color->{$region_type}."\n\n";
    }
  }

  close ($tfh1);
  undef $tfh1;
  if($self->plotfile) {#saving plotfile
    copy($plotfile,$self->plotfile);
  } 
  else {
    $self->plotfile($plotfile);
  }
  return $self->plotfile;
}     

sub _dump2gff {
  my ($self,$feat) = @_;
  my ($tfh1,$file) = $self->io->tempfile(-dir=>$self->tempdir);
  foreach my $f(@$feat){
    print $tfh1 $f->gff_string."\n";
  }
  close ($tfh1);
  undef $tfh1;
  return $file;
}

sub _run_Vista {
    my ($self,$infile) = @_;

    #run Vista
    $self->debug( "Running Vista\n");
    my $java = $self->java;
    my $param = $self->java_param || '';
    my $cmd  =   $java." ".$param.' Vista ';
    $cmd .= " -q " if $self->quiet || $self->verbose < 0;
    $cmd .= " -d " if $self->debug;
    $cmd .= $infile;
    $self->debug($cmd);
	 my $status = system ($cmd);

   $self->throw("Problem running Vista: $? \n") if $status != 0;
   
   return 1;

}
sub _coordinate {
  my ($self,$val) = @_;
  if($val){
    $self->{'_coordinate'} = $val;
  }
  return $self->{'_coordinate'};
}

=head2 outfile

  Title    : outfile
  Usage    : $obj->outfile
  Function : Get/Set method outfile
  Args     : 

=cut

=head2 min_perc_id

  Title    : min_perc_id
  Usage    : $obj->min_perc_id
  Function : Get/Set method min_perc_id
  Args     : 

=cut

=head2 quiet

  Title    : quiet
  Usage    : $obj->quiet
  Function : Get/Set method quiet
  Args     : 

=cut

=head2 verbose

  Title    : verbose
  Usage    : $obj->verbose
  Function : Get/Set method verbose
  Args     : 

=cut

=head2 annotation_format

  Title    : annotation_format
  Usage    : $obj->annotation_format
  Function : Get/Set method annotation_format
  Args     : 

=cut

=head2 region_file

  Title    : region_file
  Usage    : $obj->region_file
  Function : Get/Set method region_file
  Args     : 

=cut

=head2 score_file

  Title    : score_file
  Usage    : $obj->score_file
  Function : Get/Set method score_file
  Args     : 

=cut

=head2 alignment_file

  Title    : alignment_file
  Usage    : $obj->alignment_file
  Function : Get/Set method alignment_file
  Args     : 

=cut

=head2 contigs_file

  Title    : contigs_file
  Usage    : $obj->contigs_file
  Function : Get/Set method contigs_file
  Args     : 

=cut

=head2 diffs

  Title    : diffs
  Usage    : $obj->diffs
  Function : Get/Set method diffs
  Args     : 

=cut

=head2 plotfile

  Title    : plotfile
  Usage    : $obj->plotfile
  Function : Get/Set method plotfile
  Args     : 

=cut

=head2 min_length

  Title    : min_length
  Usage    : $obj->min_length
  Function : Get/Set method min_length
  Args     : 

=cut

=head2 plotmin

  Title    : plotmin
  Usage    : $obj->plotmin
  Function : Get/Set method plotmin
  Args     : 

=cut

=head2 annotation

  Title    : annotation
  Usage    : $obj->annotation
  Function : Get/Set method annotation
  Args     : 

=cut


=head2 bases

  Title    : bases
  Usage    : $obj->bases
  Function : Get/Set method bases
  Args     : 

=cut

=head2 tickdist

  Title    : tickdist
  Usage    : $obj->tickdist
  Function : Get/Set method tickdist
  Args     : 

=cut

=head2 resolution

  Title    : resolution
  Usage    : $obj->resolution
  Function : Get/Set method resolution
  Args     : 

=cut

=head2 title

  Title    : title
  Usage    : $obj->title
  Function : Get/Set method title
  Args     : 

=cut

=head2 window

  Title    : window
  Usage    : $obj->window
  Function : Get/Set method window
  Args     : 

=cut

=head2 numwindows

  Title    : numwindows
  Usage    : $obj->numwindows
  Function : Get/Set method numwindows
  Args     : 

=cut

=head2 start

  Title    : start
  Usage    : $obj->start
  Function : Get/Set method start
  Args     : 

=cut

=head2 end

  Title    : end
  Usage    : $obj->end
  Function : Get/Set method end
  Args     : 

=cut

=head2 num_plot_lines

  Title    : num_plot_lines
  Usage    : $obj->num_plot_lines
  Function : Get/Set method num_plot_lines
  Args     : 

=cut

=head2 legend

  Title    : legend
  Usage    : $obj->legend
  Function : Get/Set method legend
  Args     : 

=cut

=head2 filename

  Title    : filename
  Usage    : $obj->filename
  Function : Get/Set method filename
  Args     : 

=cut

=head2 axis_label

  Title    : axis_label
  Usage    : $obj->axis_label
  Function : Get/Set method axis_label
  Args     : 

=cut

=head2 ticks_file

  Title    : ticks_file
  Usage    : $obj->ticks_file
  Function : Get/Set method ticks_file
  Args     : 

=cut

=head2 color

  Title    : color
  Usage    : $obj->color
  Function : Get/Set method color
  Args     : 

=cut

=head2 use_order

  Title    : use_order
  Usage    : $obj->use_order
  Function : Get/Set method use_order
  Args     : 

=cut

=head2 gaps

  Title    : gaps
  Usage    : $obj->gaps
  Function : Get/Set method gaps
  Args     : 

=cut

=head2 snps_file

  Title    : snps_file
  Usage    : $obj->snps_file
  Function : Get/Set method snps_file
  Args     : 

=cut

=head2 repeats_file

  Title    : repeats_file
  Usage    : $obj->repeats_file
  Function : Get/Set method repeats_file
  Args     : 

=cut

=head2 filter_repeats

  Title    : filter_repeats
  Usage    : $obj->filter_repeats
  Function : Get/Set method filter_repeats
  Args     : 

=cut

1;
__END__