# Please direct questions and support issues to # # Cared for by Shawn Hoon # # Copyright Shawn Hoon # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Tools::Run::Vista Wrapper for Vista =head1 SYNOPSIS use Bio::Tools::Run::Vista; use Bio::Tools::Run::Alignment::Lagan; use Bio::AlignIO; my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'genbank'); my @seq; my $reference = $sio->next_seq; push @seq, $reference; while(my $seq = $sio->next_seq){ push @seq,$seq; } my @features = grep{$_->primary_tag eq 'CDS'} $reference->get_SeqFeatures; my $lagan = Bio::Tools::Run::Alignment::Lagan->new; my $aln = $lagan->mlagan(\@seq,'(fugu (mouse human))'); my $vis = Bio::Tools::Run::Vista->new('outfile'=>"outfile.pdf", 'title' => "My Vista Plot", 'annotation'=>\@features, 'annotation_format'=>'GFF', 'min_perc_id'=>75, 'min_length'=>100, 'plotmin' => 50, 'tickdist' => 2000, 'window'=>40, 'numwindows'=>4, 'start'=>50, 'end'=>1500, 'tickdist'=>100, 'bases'=>1000, 'java_param'=>"-Xmx128m", 'num_pages'=>1, 'color'=> {'EXON'=>'100 0 0', 'CNS'=>'0 0 100'}, 'quiet'=>1); my $referenceid= 'human'; $vis->run($aln,$referenceid); #alternative one can choose pairwise alignments to plot #where the second id in each pair is the reference sequence $vis->run($aln,([mouse,human],[fugu,human],[mouse,fugu])); =head1 DESCRIPTION Pls see Vista documentation for plotfile options Wrapper for Vista : C. Mayor, M. Brudno, J. R. Schwartz, A. Poliakov, E. M. Rubin, K. A. Frazer, L. S. Pachter, I. Dubchak. VISTA: Visualizing global DNA sequence alignments of arbitrary length. Bioinformatics, 2000 Nov;16(11):1046-1047. Get it here: http://www-gsd.lbl.gov/vista/VISTAdownload2.html On the command line, it is assumed that this can be executed: java Vista plotfile Some of the code was adapted from MLAGAN toolkit M. Brudno, C.B. Do, G. Cooper, M.F. Kim, E. Davydov, NISC Sequencing Consortium, E.D. Green, A. Sidow and S. Batzoglou LAGAN and Multi-LAGAN: Efficient Tools for Large-Scale Multiple Alignment of Genomic DNA, Genome Research, in press get lagan here: http://lagan.stanford.edu/ =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to one of the Bioperl mailing lists. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: http://redmine.open-bio.org/projects/bioperl/ =head1 AUTHOR Shawn Hoon Email shawnh@fugu-sg.org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::Tools::Run::Vista; use vars qw($AUTOLOAD @ISA %DEFAULT_VALUES %EPONINE_PARAMS @VISTA_PARAMS $EPOJAR $JAVA $PROGRAMDIR $PROGRAMNAME $PROGRAM %OK_FIELD); use strict; use Bio::Root::Root; use Bio::Seq; use Bio::Root::IO; use Bio::Tools::Run::WrapperBase; use File::Copy; @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); BEGIN { $PROGRAMNAME = 'java'; if( ! defined $PROGRAMDIR ) { $PROGRAMDIR = $ENV{'JAVA_HOME'} || $ENV{'JAVA_DIR'}; } if (defined $PROGRAMDIR) { foreach my $progname ( [qw(java)],[qw(bin java)] ) { my $f = Bio::Root::IO->catfile($PROGRAMDIR, @$progname); if( -e $f && -x $f ) { $PROGRAM = $f; last; } } } %DEFAULT_VALUES= ('java' => 'java', 'min_perc_id' => 75, 'min_length' => 100, 'plotmin' => 50, 'bases' => 10000, 'tickdist' => 2000, 'resolution'=> 25, 'window' => 40, 'title' => 'VISTA PLOT', 'numwindows'=>4); @VISTA_PARAMS=qw(JAVA JAVA_PARAM OUTFILE MIN_PERC_ID QUIET VERBOSE ANNOTATION_FORMAT REGION_FILE REGION_FILE_DIR SCORE_FILE SCORE_FILE_DIR ALIGNMENT_FILE_DIR ALIGNMENT_FILE CONTIGS_FILE DIFFS PLOTFILE MIN_LENGTH PLOTMIN ANNOTATION BASES TICKDIST RESOLUTION TITLE PAPER WINDOW NUMWINDOWS START END NUM_PLOT_LINES LEGEND FILENAME NUM_PAGES AXIS_LABEL TICKS_FILE COLOR USE_ORDER GAPS SNPS_FILE REPEATS_FILE FILTER_REPEATS); foreach my $attr ( @VISTA_PARAMS) { $OK_FIELD{$attr}++; } } sub AUTOLOAD { my $self = shift; my $attr = $AUTOLOAD; $self->debug( "************ attr: $attr\n"); $attr =~ s/.*:://; $attr = uc $attr; $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; $self->{$attr} = shift if @_; return $self->{$attr}; } =head2 new Title : new Usage : my $vis = Bio::Tools::Run::Vista->new('outfile'=>$out, 'title' => "My Vista Plot", 'annotation'=>\@features, 'annotation_format'=>'GFF', 'min_perc_id'=>75, 'min_length'=>100, 'plotmin' => 50, 'tickdist' => 2000, 'window'=>40, 'numwindows'=>4, 'start'=>50, 'end'=>1500, 'tickdist'=>100, 'bases'=>1000, 'color'=> {'EXON'=>'100 0 0', 'CNS'=>'0 0 100'}, 'quiet'=>1); Function: Construtor for Vista wrapper Args : outfile - location of the pdf generated annotation - either a file or and array ref of Bio::SeqFeatureI indicating the exons regmin -region min =cut sub new { my ($caller, @args) = @_; # chained new my $self = $caller->SUPER::new(@args); # so that tempfiles are cleaned up foreach my $key(keys %DEFAULT_VALUES){ $self->$key($DEFAULT_VALUES{$key}); } while (@args) { my $attr = shift @args; my $value = shift @args; next if( $attr =~ /^-/ ); # don't want named parameters $self->$attr($value); } return $self; } =head2 java Title : java Usage : $obj->java('/usr/opt/java130/bin/java'); Function: Get/set method for the location of java VM Args : File path (optional) =cut sub executable { shift->java(@_); } sub java { my ($self, $exe,$warn) = @_; if( defined $exe ) { $self->{'_pathtojava'} = $exe; } unless( defined $self->{'_pathtojava'} ) { if( $PROGRAM && -e $PROGRAM && -x $PROGRAM ) { $self->{'_pathtojava'} = $PROGRAM; } else { my $exe; if( ( $exe = $self->io->exists_exe($PROGRAMNAME) ) && -x $exe ) { $self->{'_pathtojava'} = $exe; } else { $self->warn("Cannot find executable for $PROGRAMNAME") if $warn; $self->{'_pathtojava'} = undef; } } } $self->{'_pathtojava'}; } =head2 run Title : run Usage : my @genes = $self->run($seq) Function: runs Vista Returns : A boolean 1 if no errors Args : Argument 1: Bio::Align::Align required Argument 2: a string or number, which is the sequence id of the reference sequence or the rank of the sequence in the alignment =cut sub run{ my ($self,$align,$ref) = @_; $ref ||=1; my $infile = $self->_setinput($align,$ref); return $self->_run_Vista($infile); } =head2 _setinput Title : _setinput Usage : Internal function, not to be called directly Function: writes input sequence to file and return the file name Example : Returns : string Args : =cut sub _setinput { my ($self,$sim_aln,$ref) = @_; my($pairs,$files) = $self->_mf2bin($sim_aln,$ref); my $plotfile = $self->_make_plotfile($sim_aln,$pairs,$files); return $plotfile; } sub _parse_multi_fasta { my ($self,$file) = @_; my %seq; open(FASTA, $file) || $self->throw("Couldn't open $file"); my $last; my $count = 0; while (my $line = ) { chomp $line; next if $line=~/^$/; if (substr($line, 0, 1) eq ">") { $_ = substr($line, 1); /\w+/g; $seq{$&} = ""; $last = $&; } else { $seq{$last}.=$line; } print STDERR $count."\n"; $count++; } my @seq; foreach my $key(keys %seq){ my $seq = Bio::Seq->new(-id=>$key,-seq=>$seq{$key}); push @seq,$seq; } return @seq; } #adapted from mlagan utils mf2bin.pl sub _mf2bin { my ($self,$sim,$ref)= @_; my @seq; if(!ref $sim){ @seq = $self->_parse_multi_fasta($sim); } else { ($sim && $sim->isa("Bio::Align::AlignI")) || $self->throw("Expecting a Bio::Align::AlignI"); @seq = $sim->each_seq; } my $reference; my @files; my @pairs; if(ref($ref) eq 'ARRAY'){ my @ref; foreach my $set(@$ref){ my ($reference) = grep{$_->id eq $set->[1]}@seq; my ($other) = grep{$_->id eq $set->[0]}@seq; my ($pair,$file) = $self->_pack_bin($reference,$other); push @pairs, @$pair; push @files, @$file; push @ref,$set->[1]; } $self->_coordinate(\@ref); return \@pairs,\@files; } #figure out the reference sequence elsif($ref =~/^\d+$/){ #its a rank index $reference = $seq[$ref-1]; my $tmp = $ref; $ref = $reference->id; splice @seq,($tmp-1),1; } else { #its an id foreach my $i(0..$#seq){ if($seq[$i]->id =~/$ref/){ $reference = $seq[$i]; splice @seq,($i),1; last; } } } $self->_coordinate([$ref]); # pack bin # format from Alex Poliakov's glass2bin.pl script my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5, 'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5); my @ref= (split ('',$reference->seq)); foreach my $seq2(@seq){ my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir); my @seq2= (split('', $seq2->seq)); foreach my $index(0..$#ref){ unless($ref[$index] eq '-' && $seq2[$index] eq '-'){ print $tfh1 pack("H2",$base_code{$ref[$index]}.$base_code{$seq2[$index]}); } } close ($tfh1); undef ($tfh1); push @files, $outfile; push @pairs,[$reference->id,$seq2->id]; } return \@pairs,\@files; } sub _pack_bin { my ($self,$first,$sec) = @_; my @first = (split('',$first->seq)); my @sec = (split('',$sec->seq)); # pack bin # format from Alex Poliakov's glass2bin.pl script my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5, 'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5); my @files; my @pairs; my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir); foreach my $index(0..$#first){ unless($first[$index] eq '-' && $sec[$index] eq '-'){ print $tfh1 pack("H2",$base_code{$first[$index]}.$base_code{$sec[$index]}); } } close ($tfh1); undef ($tfh1); push @files, $outfile; push @pairs,[$first->id,$sec->id]; return \@pairs,\@files; } sub _make_plotfile { my ($self,$sim_aln,$pairs,$files) = @_; my ($tfh1,$plotfile) = $self->io->tempfile(-dir=>$self->tempdir); my @ids = map{$_->id}$sim_aln->each_seq; print $tfh1 "TITLE ".$self->title."\n\n"; print $tfh1 "OUTPUT ".$self->outfile."\n\n" ; print $tfh1 "SEQUENCES "; print $tfh1 join(" ",@ids)."\n\n"; foreach my $index(0..$#$pairs){ print $tfh1 "ALIGN ".$files->[$index]." BINARY\n"; print $tfh1 " SEQUENCES ".$pairs->[$index]->[0]." ".$pairs->[$index]->[1]."\n"; print $tfh1 " REGIONS ".$self->min_perc_id." ".$self->min_length."\n"; print $tfh1 " MIN ".$self->plotmin."\n"; print $tfh1 " DIFFS ". $self->diffs ."\n\n" if $self->diffs; if($self->region_file||$self->region_file_dir){ my $file = " REGION_FILE "; $file.=$self->region_file_dir."/" if $self->region_file_dir; $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".region\n\n"; print $tfh1 $file; } if($self->score_file || $self->score_file_dir){ my $file = " SCORE_FILE "; $file.=$self->score_file_dir."/" if $self->score_file_dir; $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".score\n\n"; print $tfh1 $file; } if($self->alignment_file || $self->alignment_file_dir){ my $file = " ALIGNMENT_FILE "; $file.=$self->alignment_file_dir."/" if $self->alignment_file_dir; $file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".alignment\n\n"; print $tfh1 $file; } print $tfh1 " CONTIGS_FILE ". $self->contigs_file ."\n\n" if $self->contigs_file; print $tfh1 " USE_ORDER ". $self->use_order."\n\n" if $self->use_order; print $tfh1 "END \n\n"; } my $annotation_file; if((ref $self->annotation eq 'ARRAY')&& $self->annotation->[0]->isa("Bio::SeqFeatureI")){ $annotation_file = $self->_dump2gff($self->annotation); $self->annotation_format('GFF'); } elsif($self->annotation){ $annotation_file = $self->annotation; } $annotation_file .= " GFF" if $self->annotation_format=~/GFF/i; print $tfh1 "GENES ".$annotation_file." \n\n" if $annotation_file; print $tfh1 "LEGEND on\n\n"; print $tfh1 "COORDINATE ".join(" ",@{$self->_coordinate})."\n\n"; print $tfh1 "PAPER letter\n\n"; print $tfh1 "BASES ".$self->bases."\n\n"; print $tfh1 "TICK_DIST ".$self->tickdist."\n\n"; print $tfh1 "RESOLUTION ".$self->resolution."\n\n"; print $tfh1 "WINDOW ".$self->window."\n\n"; print $tfh1 "NUM_WINDOWS ".$self->numwindows."\n\n"; print $tfh1 "AXIS_LABEL ".$self->axis_label ."\n\n" if $self->axis_label; print $tfh1 "TICKS_FILE ".$self->ticks_file ."\n\n" if $self->ticks_file; print $tfh1 "SNPS_FILE"." ".$self->snps_file."\n\n" if $self->snps_file; print $tfh1 "GAPS ".$self->gaps ."\n\n"if $self->gaps; print $tfh1 "REPEATS_FILE ".$self->repeats_file ."\n\n" if $self->repeats_file; print $tfh1 "FILTER_REPEATS ".$self->filter_repeats ."\n\n" if $self->filter_repeats; print $tfh1 "NUM_PAGES ".$self->num_pages ."\n\n" if $self->num_pages; print $tfh1 "START ".$self->start ."\n\n" if $self->start; print $tfh1 "END ".$self->end ."\n\n" if $self->end; my $color = $self->color; if(ref $color eq 'HASH'){ foreach my $region_type (keys %$color){ print $tfh1 "COLOR ".$region_type." ".$color->{$region_type}."\n\n"; } } close ($tfh1); undef $tfh1; if($self->plotfile) {#saving plotfile copy($plotfile,$self->plotfile); } else { $self->plotfile($plotfile); } return $self->plotfile; } sub _dump2gff { my ($self,$feat) = @_; my ($tfh1,$file) = $self->io->tempfile(-dir=>$self->tempdir); foreach my $f(@$feat){ print $tfh1 $f->gff_string."\n"; } close ($tfh1); undef $tfh1; return $file; } sub _run_Vista { my ($self,$infile) = @_; #run Vista $self->debug( "Running Vista\n"); my $java = $self->java; my $param = $self->java_param || ''; my $cmd = $java." ".$param.' Vista '; $cmd .= " -q " if $self->quiet || $self->verbose < 0; $cmd .= " -d " if $self->debug; $cmd .= $infile; $self->debug($cmd); my $status = system ($cmd); $self->throw("Problem running Vista: $? \n") if $status != 0; return 1; } sub _coordinate { my ($self,$val) = @_; if($val){ $self->{'_coordinate'} = $val; } return $self->{'_coordinate'}; } =head2 outfile Title : outfile Usage : $obj->outfile Function : Get/Set method outfile Args : =cut =head2 min_perc_id Title : min_perc_id Usage : $obj->min_perc_id Function : Get/Set method min_perc_id Args : =cut =head2 quiet Title : quiet Usage : $obj->quiet Function : Get/Set method quiet Args : =cut =head2 verbose Title : verbose Usage : $obj->verbose Function : Get/Set method verbose Args : =cut =head2 annotation_format Title : annotation_format Usage : $obj->annotation_format Function : Get/Set method annotation_format Args : =cut =head2 region_file Title : region_file Usage : $obj->region_file Function : Get/Set method region_file Args : =cut =head2 score_file Title : score_file Usage : $obj->score_file Function : Get/Set method score_file Args : =cut =head2 alignment_file Title : alignment_file Usage : $obj->alignment_file Function : Get/Set method alignment_file Args : =cut =head2 contigs_file Title : contigs_file Usage : $obj->contigs_file Function : Get/Set method contigs_file Args : =cut =head2 diffs Title : diffs Usage : $obj->diffs Function : Get/Set method diffs Args : =cut =head2 plotfile Title : plotfile Usage : $obj->plotfile Function : Get/Set method plotfile Args : =cut =head2 min_length Title : min_length Usage : $obj->min_length Function : Get/Set method min_length Args : =cut =head2 plotmin Title : plotmin Usage : $obj->plotmin Function : Get/Set method plotmin Args : =cut =head2 annotation Title : annotation Usage : $obj->annotation Function : Get/Set method annotation Args : =cut =head2 bases Title : bases Usage : $obj->bases Function : Get/Set method bases Args : =cut =head2 tickdist Title : tickdist Usage : $obj->tickdist Function : Get/Set method tickdist Args : =cut =head2 resolution Title : resolution Usage : $obj->resolution Function : Get/Set method resolution Args : =cut =head2 title Title : title Usage : $obj->title Function : Get/Set method title Args : =cut =head2 window Title : window Usage : $obj->window Function : Get/Set method window Args : =cut =head2 numwindows Title : numwindows Usage : $obj->numwindows Function : Get/Set method numwindows Args : =cut =head2 start Title : start Usage : $obj->start Function : Get/Set method start Args : =cut =head2 end Title : end Usage : $obj->end Function : Get/Set method end Args : =cut =head2 num_plot_lines Title : num_plot_lines Usage : $obj->num_plot_lines Function : Get/Set method num_plot_lines Args : =cut =head2 legend Title : legend Usage : $obj->legend Function : Get/Set method legend Args : =cut =head2 filename Title : filename Usage : $obj->filename Function : Get/Set method filename Args : =cut =head2 axis_label Title : axis_label Usage : $obj->axis_label Function : Get/Set method axis_label Args : =cut =head2 ticks_file Title : ticks_file Usage : $obj->ticks_file Function : Get/Set method ticks_file Args : =cut =head2 color Title : color Usage : $obj->color Function : Get/Set method color Args : =cut =head2 use_order Title : use_order Usage : $obj->use_order Function : Get/Set method use_order Args : =cut =head2 gaps Title : gaps Usage : $obj->gaps Function : Get/Set method gaps Args : =cut =head2 snps_file Title : snps_file Usage : $obj->snps_file Function : Get/Set method snps_file Args : =cut =head2 repeats_file Title : repeats_file Usage : $obj->repeats_file Function : Get/Set method repeats_file Args : =cut =head2 filter_repeats Title : filter_repeats Usage : $obj->filter_repeats Function : Get/Set method filter_repeats Args : =cut 1; __END__