# BioPerl module for Bio::Tools::Run::Primate
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by
#
# Copyright Shawn Hoon
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Wrapper for Primate, Guy Slater's near exact match finder for short sequence
tags.
=head1 SYNOPSIS
use Bio::Tools::Run::Primate;
use Bio::SeqIO;
my $query = "primer.fa";
my $target = "contig.fa";
my @params = ("query" => $query,"target" => $target,"m"=>0);
my $fact = Bio::Tools::Run::Primate->new(@params);
my @feat = $fact->run;
foreach my $feat(@feat) {
print $feat->seqname."\t".$feat->primary_tag."\t".$feat->start.
"\t".$feat->end."\t".$feat->strand."\t".$feat->seq->seq."\n";
}
=head1 DESCRIPTION
Primate is available under to ensembl-nci package at
http://cvsweb.sanger.ac.uk/cgi-bin/cvsweb.cgi/ensembl-nci/?cvsroot=Ensembl
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via the
web:
http://redmine.open-bio.org/projects/bioperl/
=head1 AUTHOR - Shawn Hoon
Email shawnh@fugu-sg.org
=head1 APPENDIX
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
=cut
package Bio::Tools::Run::Primate;
use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR @PRIMATE_PARAMS $PROGRAMNAME
@OTHER_SWITCHES %OK_FIELD);
use strict;
use Bio::Root::Root;
use Bio::Root::IO;
use Bio::Factory::ApplicationFactoryI;
use Bio::SeqIO;
use Bio::SeqFeature::Generic;
use Bio::Tools::Run::WrapperBase;
@ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
BEGIN {
@PRIMATE_PARAMS = qw(V Q T M B QUERY TARGET OUTFILE PROGRAM EXECUTABLE);
@OTHER_SWITCHES = qw(QUIET VERBOSE);
# Authorize attribute fields
foreach my $attr ( @PRIMATE_PARAMS,@OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
}
=head2 program_name
Title : program_name
Usage : $factory>program_name()
Function: holds the program name
Returns: string
Args : None
=cut
sub program_name {
return 'primate';
}
=head2 program_dir
Title : program_dir
Usage : $factory->program_dir(@params)
Function: returns the program directory, obtained from ENV variable.
Returns: string
Args :
=cut
sub program_dir {
return Bio::Root::IO->catfile($ENV{PRIMATEDIR}) if $ENV{PRIMATEDIR};
}
=head2 new
Title : new
Usage : my $obj = Bio::Tools::Run::Primate->new()
Function: Builds a new Bio::Tools::Run::Primate objet
Returns : Bio::Tools::Run::Primate
Args : query => the L<Bio::PrimarySeqI> object or a file path
target => the L<Bio::PrimarySeqI> object or a file path
m => the number of mismatches allowed, default 1(integer)
b => [TRUE|FALSE] find best match, default FALSE
executable=>where the program sits
=cut
sub new {
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
my ($attr, $value);
while (@args) {
$attr = shift @args;
$value = shift @args;
next if( $attr =~ /^-/ ); # don't want named parameters
if($attr =~/^q$/i){
$self->query($value);
}
if($attr =~/^t$/i){
$self->target($value);
}
$self->$attr($value);
}
return $self;
}
sub AUTOLOAD {
my $self = shift;
my $attr = $AUTOLOAD;
$attr =~ s/.*:://;
$attr = uc $attr;
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
$self->{$attr} = shift if @_;
return $self->{$attr};
}
=head2 version
Title : version
Usage : $primate->version
Function: Determine the version number of the program
Returns : float or undef
Args : none
=cut
sub version {
my ($self) = @_;
my $exe = $self->executable();
return undef unless defined $exe;
my $string = `$exe -v ` ;
$string =~ /\(([\d.]+)\)/;
return $1 || undef;
}
=head2 search
Title : search
Usage : DEPRECATED. Use $factory->run() instead
Function: Perform a primate search
Returns : Array of L<Bio::SeqFeature::Generic>
Args :
=cut
sub search {
return shift->run(@_);
}
=head2 run
Title : run
Usage : @feat = $factory->run();
Function: Perform a primate search
Returns : Array of L<Bio::SeqFeature::Generic>
Args :
=cut
sub run{
my ($self,$target) = @_;
$target = $target ||$self->target;
$target || $self->throw("Need a target sequence");
$self->query || $self->throw("Need a query sequence");
# Create input file pointer
my ($query_file,$target_file)= $self->_setinput($self->query,$target);
if (!($query_file && $target_file)) {$self->throw("Unable to create temp files for query and target !");}
# Create parameter string to pass to primate program
my $param_string = $self->_setparams();
# run primate
my @feats= $self->_run($query_file,$target_file,$param_string);
return @feats;
}
#################################################
#INTERNAL METHODS
=head2 _run
Title : _run
Usage : Internal function, not to be called directly
Function: makes actual system call to dba program
Returns : array of L<Bio::SeqFeature::Generic>
Args : path to query and target file and parameter string
=cut
sub _run {
my ($self,$query_file,$target_file,$param_string) = @_;
my $instring;
$self->debug( "Program ".$self->executable."\n");
my ($tfh,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
close($tfh); # this is to make sure we don't have
# open filehandles
undef $tfh;
my $commandstring = $self->executable.
" $param_string -q $query_file -t $target_file > $outfile";
$self->debug( "primate command = $commandstring");
my $status = system($commandstring);
$self->throw( "primate call ($commandstring) crashed: $? \n") unless $status==0;
#parse pff format and return a Bio::Search::HSP::GenericHSP array
my @feats = $self->_parse_results($outfile);
return @feats;
}
=head2 _parse_results
Title : _parse_results
Usage : Internal function, not to be called directly
Function: Passes primate output
Returns : array of L<Bio::SeqFeature::Generic>
Args : the name of the output file
=cut
sub _parse_results {
my ($self,$outfile) = @_;
$outfile||$self->throw("No outfile specified");
my @feats;
my %query = $self->_query_seq();
open(OUT,$outfile);
while(my $entry = <OUT>){
chomp($entry);
if($entry =~ /primate/ ) {
my ($dummy,$tagname, $seqname, $strand,$seq_end,$mismatch) = split(" " , $entry );
#map primate coordinates to Seq coordinates
my $seq_start = $seq_end- length($query{$tagname})+2;
$seq_end++;
my $feature = Bio::SeqFeature::Generic->new( -seq_id => $seqname,
-strand => $strand,
-score => $mismatch,
-start => $seq_start,
-end => $seq_end,
-frame => 1,
-source => 'primate',
-primary => $tagname);
$feature->attach_seq($self->_target_seq);
push @feats,$feature;
}
}
return @feats;
}
=head2 _setinput()
Title : _setinput
Usage : Internal function, not to be called directly
Function: Create input files for primate
Returns : name of file containing query and target
Args : query and target (either a filename or a L<Bio::PrimarySeqI>
=cut
sub _setinput {
my ($self, $query,$target) = @_;
my ($query_file,$target_file,$tfh1,$tfh2);
my @query = ref ($query) eq "ARRAY" ? @{$query} : ($query);
foreach my $query(@query){
if(ref($query)&& $query->isa("Bio::PrimarySeqI")){
($tfh1,$query_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $out1 = Bio::SeqIO->new(-fh=> $tfh1 , '-format' => 'fasta');
my %query;
$query{$query->primary_id} = $query->seq;
$self->_query_seq(\%query);
$out1->write_seq($query) || return 0;
close ($tfh1);
undef $tfh1;
}
elsif (-e $query){
my $in = Bio::SeqIO->new(-file => $query , '-format' => 'fasta');
($tfh1,$query_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $out1 = Bio::SeqIO->new(-fh=> $tfh1 , '-format' => 'fasta');
my %query;
while(my $seq1 = $in->next_seq()){
$out1->write_seq($seq1) || return 0;
$query{$seq1->primary_id} = $seq1->seq;
}
close($tfh1);
undef $tfh1;
$self->_query_seq(\%query);
}
else {
return 0;
}
}
if(ref($target) && $target->isa("Bio::PrimarySeqI")){
($tfh2,$target_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $out1 = Bio::SeqIO->new(-fh=> $tfh2 , '-format' => 'Fasta');
$out1->write_seq($target)|| return 0;
$self->_target_seq($target);
close($tfh2);
undef $tfh2;
}
elsif (-e $target){
my $in = Bio::SeqIO->new(-file => $target , '-format' => 'fasta');
($tfh2,$target_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $out = Bio::SeqIO->new(-fh=> $tfh2 , '-format' => 'fasta');
my $seq1 = $in->next_seq() || return 0;
$out->write_seq($seq1);
close($tfh2);
undef $tfh2;
$self->_target_seq($seq1);
}
else {
return 0;
}
return $query_file,$target_file;
}
=head2 _setparams()
Title : _setparams
Usage : Internal function, not to be called directly
Function: Create parameter inputs for primate program
Returns : parameter string to be passed to primate
Args : the param array
=cut
sub _setparams {
my ($attr, $value, $self);
$self = shift;
my $param_string = "";
for $attr ( @PRIMATE_PARAMS ) {
$value = $self->$attr();
next unless (defined $value);
my $attr_key = lc $attr; #put params in format expected by dba
$attr_key = ' -'.$attr_key;
if(($attr_key !~/QUERY/i) && ($attr_key !~/TARGET/i)){
$param_string .= $attr_key.' '.$value;
}
}
if ($self->quiet() || $self->verbose() < 0) {
my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
$param_string .= " >$null ";
}
return $param_string;
}
=head2 _query_seq()
Title : _query_seq
Usage : Internal function, not to be called directly
Function: get/set for the query sequence
Returns : a hash of seq with key the query tag
Args : optional
=cut
sub _query_seq {
my ($self,$seq) = @_;
if(defined $seq){
$self->{'_query_seq'} = $seq;
}
return %{$self->{'_query_seq'}};
}
=head2 _target_seq()
Title : _target_seq
Usage : Internal function, not to be called directly
Function: get/set for the target sequence
Returns : L<Bio::PrimarySeqI>
Args : optional
=cut
sub _target_seq {
my ($self,$seq) = @_;
if(defined $seq){
$self->{'_target_seq'} = $seq;
}
return $self->{'_target_seq'};
}
1; # Needed to keep compiler happy