# BioPerl module for Bio::Tools::Run::Phylo::Phylip::SeqBoot
# Created by
# Shawn Hoon 
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME 

Bio::Tools::Run::Phylo::Phylip::SeqBoot - Wrapper for the phylip
program SeqBoot


  #Create a SimpleAlign object
  @params = ('ktuple' => 2, 'matrix' => 'BLOSUM');
  $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
  $inputfilename = 't/data/cysprot.fa';
  $aln = $factory->align($inputfilename); # $aln is a SimpleAlign object.

  # Use seqboot to generate bootstap alignments
  my @params = ('datatype'=>'SEQUENCE','replicates'=>100);
  my $seq = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);

  my $aln_ref = $seq->run($aln);

  my $aio = Bio::AlignIO->new(-file=>">alignment.bootstrap",-format=>"phylip");
  foreach my $ai(@{$aln_ref}){

  # To prevent PHYLIP from truncating sequence names:
  # Step 1. Shelf the original names:
  my ($aln_safe, $ref_name)=                  #  $aln_safe has serial names
             $aln->set_displayname_safe();    #  $ref_name holds orginal names 
  # Step 2. Run PHYLIP programs:
  $aln_ref = $seq->run($aln_safe);            #  Use $aln_safe instead of $aln
  # Step 3. Retrieve orgininal names
  $aio = Bio::AlignIO->new(
             -format=>"fasta");               #  FASTA output to view full names
  foreach my $ai(@{$aln_ref}){
         my $new_aln=$ai->restore_displayname($ref_name); #  Restore names


Wrapper for seqboot from the phylip package by Joseph Felsentein.

Taken from phylip doc...

"SEQBOOT is a general boostrapping tool.  It is intended to  allow  you  to
generate  multiple data sets that are resampled versions of the input data set.
SEQBOOT  can  handle  molecular   sequences,   binary   characters, 
restriction sites, or gene frequencies."

More documentation on using seqboot and setting parameters may be found
in the phylip package.

This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.


=head2 MODEL

Description	: (optional)

                  This program supports 3 different datatypes
                  SEQUENCE: Molecular Sequences
                  MORPH   : Discrete  Morphological  Characters
                  REST    : Restriction Sites
                  GENEFREQ: Gene  Frequencies

             Defaults to SEQUENCE

=head2 PERMUTE

Description: (optional)

             3 different resampling methods are available:

             BOOTSTRAP : creating a new data set by sampling N
                         characters randomly with replacement The
                         resulting data set has the same size as the
                         original, but some characters have been left
                         out and others are duplicated

             JACKKNIFE : Delete-half-jackknifing. It involves sampling
                         a random half of the characters, and
                         including them in the data but dropping the
                         others The resulting data sets are half the
                         size of the original, and no characters are

             PERMUTE : Permuting species within characters. It
                       involves permuting the columns of the data
                       matrix separately.  This produces data matrices
                       that have the same number and kinds of
                       characters but no taxonomic structure.

             Defaults to BOOTSTRAP


  Description	: (optional)

                This options allows the user to set the number of
                replicate data sets. Most statisticians would be
                happiest with 1000 to 10,000 replicates in a
                bootstrap, but 100 gives a good rough picture

                Defaults to 100

=head2 ALLELES 

Title		:  ALLELES
Description : (optional)

            This option is to be used with gene frequencies datatype
            option to specify that all alleles at each locus are in
            the input file.

		  Defaults to NULL 


=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support 

Please direct usage questions or support issues to the mailing list:


rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via the


=head1 AUTHOR - Shawn Hoon 

Email shawnh@fugu-sg.org 


The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _



package Bio::Tools::Run::Phylo::Phylip::SeqBoot;

use strict;
use Bio::SimpleAlign;
use Bio::AlignIO;
use Bio::TreeIO;
use Bio::Tools::Run::Phylo::Phylip::Base;
use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
use Bio::Matrix::PhylipDist;
use Cwd;

# inherit from Phylip::Base which has some methods for dealing with
# Phylip specifics
@ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);

# You will need to enable the SeqBoot program. This
# can be done in (at least) 3 ways:
# 1. define an environmental variable PHYLIPDIR:
# export PHYLIPDIR=/home/shawnh/PHYLIP/bin
# 2. include a definition of an environmental variable CLUSTALDIR in
# every script that will use Clustal.pm.
# $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
# 3. You can set the path to the program through doing:
# my @params('executable'=>'/usr/local/bin/seqboot');
# my $SeqBoot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);

	foreach my $attr(@SEQBOOT_PARAMS,@OTHER_SWITCHES) {

=head2 program_name

 Title   : program_name
 Usage   : >program_name()
 Function: holds the program name
 Returns:  string
 Args    : None


sub program_name {
  return 'seqboot';

=head2 program_dir

 Title   : program_dir
 Usage   : ->program_dir()
 Function: returns the program directory, obtained from ENV variable.
 Returns:  string
 Args    :


sub program_dir {
  return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};

sub new {
    my ($class,@args) = @_;
    my $self = $class->SUPER::new(@args);
    my ($attr, $value);
    while (@args)  {
	$attr =   shift @args;
	$value =  shift @args;
	next if( $attr =~ /^-/ ); # don't want named parameters
	if ($attr =~/PROGRAM/i) {
	if ($attr =~ /IDLENGTH/i){
    return $self;

    my $self = shift;
    my $attr = $AUTOLOAD;
    $attr =~ s/.*:://;
    $attr = uc $attr;
    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
    $self->{$attr} = shift if @_;
    return $self->{$attr};

=head2 idlength 

 Title   : idlength 
 Usage   : $obj->idlength ($newval)
 Returns : value of idlength 
 Args    : newvalue (optional)


sub idlength{
   my $self = shift;
   if( @_ ) {
      my $value = shift;
      $self->{'idlength'} = $value;
    return $self->{'idlength'};


=head2  run 

 Title   : run 
 Usage   :
	$inputfilename = 't/data/prot.phy';
	$matrix= $seqboot_factory->run($inputfilename);
	$seq_array_ref = \@seq_array; @seq_array is array of Seq objs
	$aln = $clustalw_factory->align($seq_array_ref);
	$aln_ref = $SeqBootfactory->run($aln);

 Function: Create bootstrap sets of alignments
 Example :
 Returns : an array ref of L<Bio::SimpleAlign>
 Args    : Name of a file containing a multiple alignment in Phylip format
           or an SimpleAlign object 

 Throws an exception if argument is not either a string (eg a
 filename) or a Bio::SimpleAlign object. If
 argument is string, throws exception if file corresponding to string
 name can not be found. 


sub run{

    my ($self,$input) = @_;
    my ($infilename);

# Create input file pointer
  	$infilename = $self->_setinput($input);
    if (!$infilename) {$self->throw("Problems setting up for seqboot. Probably bad input data in $input !");}

# Create parameter string to pass to SeqBoot program
    my $param_string = $self->_setparams();
# run SeqBoot
    my $aln = $self->_run($infilename,$param_string);
    return $aln;


=head2  _run

 Title   :  _run
 Usage   :  Internal function, not to be called directly	
 Function:  makes actual system call to SeqBoot program
 Example :
 Returns : an array ref of <Bio::SimpleAlign> 
 Args    : Name of a file containing a set of multiple alignments in Phylip format 
           and a parameter string to be passed to SeqBoot


sub _run {
    my ($self,$infile,$param_string) = @_;
    my $instring;
    my $curpath = cwd;    
    unless( File::Spec->file_name_is_absolute($infile) ) {
	$infile = $self->io->catfile($curpath,$infile);
    #odd random seed
    my $rand = (2 * int(rand(10000)) + 1);
    if ($self->version == 3.5){
      $instring =  $infile."\n$rand\n$param_string";
    else {
      $instring =  $infile."\n$param_string$rand\n";
    $self->debug( "Program ".$self->executable." $instring\n");
    #open a pipe to run SeqBoot to bypass interactive menus
    if ($self->quiet() || $self->verbose() < 0) {
	my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
    	open(SeqBoot,"|".$self->executable .">$null");
    else {
    print SeqBoot $instring;
    # get the results
    my $outfile = $self->io->catfile($self->tempdir,$self->outfile);
    $self->throw("SeqBoot did not create files correctly ($outfile)")
  	unless (-e $outfile);
    #parse the alignments
    my @aln;
    my @parse_params;

    push @parse_params, ('-interleaved' => 1) if $self->version == 3.6;
    my $aio = Bio::AlignIO->new(-file=>$outfile,-format=>"phylip",
    while (my $aln = $aio->next_aln){
        push @aln, $aln;

    # Clean up the temporary files created along the way...
    unlink $outfile unless $self->save_tempfiles;
    return \@aln;

=head2  _setinput()

 Title   :  _setinput
 Usage   :  Internal function, not to be called directly	
 Function:   Create input file for SeqBoot program
 Example :
 Returns : name of file containing a multiple alignment in Phylip format 
 Args    : SimpleAlign object reference or input file name


sub _setinput {
    my ($self, $input) = @_;
    my ($alnfilename,$tfh);
    #  a phy formatted alignment file 
  	unless (ref $input) {
        # check that file exists or throw
        $alnfilename= $input;
        unless (-e $input) {return 0;}
		   return $alnfilename;
    my @input = ref($input) eq 'ARRAY' ? @{$input}: ($input);

    ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
    my $alnIO = Bio::AlignIO->new(-fh => $tfh, 
    foreach my $input(@input){
    #  $input should be a Bio::Align::AlignI 
      $input->isa("Bio::Align::AlignI") || $self->throw("Expecting a Bio::Align::AlignI object");
      #  Open temporary file for both reading & writing of BioSeq array
    return $alnfilename;		

=head2  _setparams()

 Title   :  _setparams
 Usage   :  Internal function, not to be called directly	
 Function:   Create parameter inputs for SeqBoot program
 Example :
 Returns : parameter string to be passed to SeqBoot
 Args    : name of calling object


sub _setparams {
    my ($attr, $value, $self);

    #do nothing for now
    $self = shift;
    my $param_string = "";
    my $cat = 0;
    my $gene_freq = 0;
    my %menu = %{$Menu{$self->version}->{'SEQBOOT'}};

    foreach  my $attr ( @SEQBOOT_PARAMS) {
    	$value = $self->$attr();
    	next unless (defined $value);
    	if ($attr =~/REPLICATES/i){
        if( $value !~ /(\d+(\.\d+)?)/ ) {
            $self->warn("Expected a number in  $attr\n");
		   $param_string .= $menu{'REPLICATES'}."$value\n";
          $gene_freq = 1 if $value =~/GENEFREQ/i;
          $param_string .= $menu{'DATATYPE'}{uc $value};
      else {
       if($attr =~/ALLELES/i){
             $self->warn("Alleles options only be used with alleles option");
           $param_string .=$menu{uc $attr};
    $param_string .= $menu{'SUBMIT'};

    return $param_string;

=head1 Bio::Tools::Run::Wrapper methods


=head2 no_param_checks

 Title   : no_param_checks
 Usage   : $obj->no_param_checks($newval)
 Function: Boolean flag as to whether or not we should
           trust the sanity checks for parameter values  
 Returns : value of no_param_checks
 Args    : newvalue (optional)


=head2 save_tempfiles

 Title   : save_tempfiles
 Usage   : $obj->save_tempfiles($newval)
 Returns : value of save_tempfiles
 Args    : newvalue (optional)


=head2 outfile_name

 Title   : outfile_name
 Usage   : my $outfile = $SeqBoot->outfile_name();
 Function: Get/Set the name of the output file for this run
           (if you wanted to do something special)
 Returns : string
 Args    : [optional] string to set value to


=head2 tempdir

 Title   : tempdir
 Usage   : my $tmpdir = $self->tempdir();
 Function: Retrieve a temporary directory name (which is created)
 Returns : string which is the name of the temporary directory
 Args    : none


=head2 cleanup

 Title   : cleanup
 Usage   : $codeml->cleanup();
 Function: Will cleanup the tempdir directory after a SeqBoot run
 Returns : none
 Args    : none


=head2 io

 Title   : io
 Usage   : $obj->io($newval)
 Function:  Gets a L<Bio::Root::IO> object
 Returns : L<Bio::Root::IO>
 Args    : none


1; # Needed to keep compiler happy