#
# BioPerl module for Bio::Tools::Run::Match
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by Sendu Bala <bix@sendu.me.uk>
#
# Copyright Sendu Bala
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::Tools::Run::Match - Wrapper for Transfac's match(TM)
=head1 SYNOPSIS
use Bio::Tools::Run::Match;
# Make a Match factory
$factory = Bio::Tools::Run::Match->new(-mxlib => '/path/to/matrix.dat');
# Run Match on an sequence object
my @results = $factory->run($bio_seq);
# look at the results
foreach my $feat (@results) {
my $seq_id = $feat->seq_id;
my $start = $feat->start;
my $end = $feat->end;
my $score = $feat->score;
my ($pvalue) = $feat->get_tag_values('pvalue');
}
=head1 DESCRIPTION
This is a wrapper for running the match(TM) program supplied with Transfac Pro
distributions.
You can try supplying normal match command-line arguments to new(), eg.
new(-b => 1) or calling arg-named methods (excluding the initial
hyphens, eg. $factory->b(1) to set the -b option to true).
Histogram output isn't supported. -p is supported by using -mxprf, see the
docs of new() for details.
You will need to enable this match wrapper to find the match executable.
This can be done in (at least) three ways:
1. Make sure match is in your path.
2. Define an environmental variable MATCHDIR which is a
directory which contains the match executable:
In bash:
export MATCHDIR=/home/username/match/
In csh/tcsh:
setenv MATCHDIR /home/username/match
3. Include a definition of an environmental variable MATCHDIR in
every script that will use this match wrapper module, e.g.:
BEGIN { $ENV{MATCHDIR} = '/home/username/match/' }
use Bio::Tools::Run::Match;
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:
http://redmine.open-bio.org/projects/bioperl/
=head1 AUTHOR - Sendu Bala
Email bix@sendu.me.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::Tools::Run::Match;
use strict;
use Cwd;
use File::Spec;
use Bio::SeqIO;
use Bio::FeatureIO;
use Bio::Annotation::SimpleValue;
use Bio::Tools::Match;
use base qw(Bio::Tools::Run::WrapperBase);
our $PROGRAM_NAME = 'match';
our $PROGRAM_DIR = $ENV{'MATCHDIR'};
# methods for the match args we support
our @PARAMS = qw(mxlib mxprf imcut); # these aren't actually match args, but
# are methods we use internally
our @SWITCHES = qw(b u);
# just to be explicit, args we don't support (yet) or we handle ourselves
our @UNSUPPORTED = qw(H HH pp ppg pn png pr jkn i p);
=head2 program_name
Title : program_name
Usage : $factory>program_name()
Function: holds the program name
Returns : string
Args : None
=cut
sub program_name {
return $PROGRAM_NAME;
}
=head2 program_dir
Title : program_dir
Usage : $factory->program_dir(@params)
Function: returns the program directory, obtained from ENV variable.
Returns : string
Args : None
=cut
sub program_dir {
return $PROGRAM_DIR;
}
=head2 new
Title : new
Usage : $factory = Bio::Tools::Run::Match->new()
Function: creates a new MCS factory
Returns : Bio::Tools::Run::MCS
Args : The following args can either be supplied here or set by calling
arg-named methods (eg. $factory->imcut(2) ).
-mxlib => path to the matrix.dat file containing Transfac matricies
-mxprf => path to a profile file | [core_thresh, [matrix_thresh]]
(defaults to a standard one based on the mxlib provided if
file not supplied, using core_thresh and matrix_thresh
values if those are supplied instead)
-imcut => floating point number, the importance cutoff
-b | -u => boolean, mutually exclusive
=cut
sub new {
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
$self->_set_from_args(\@args, -methods => [@PARAMS, @SWITCHES, 'quiet'],
-create => 1);
return $self;
}
=head2 run
Title : run
Usage : $result = $factory->run($bio_seqi_object);
Function: Runs match on a sequence.
Returns : list of Bio::SeqFeatureI feature objects
Args : Bio::SeqI compliant object
NB: mxlib has to have been set prior to calling run(), either as an
argument to new() or by calling mxlib().
=cut
sub run {
my ($self, $seq) = @_;
$self->mxlib || $self->throw("mxlib has to have been set first");
return $self->_run($seq);
}
sub _run {
my ($self, $seq) = @_;
my $exe = $self->executable || return;
my $mxlib = File::Spec->rel2abs($self->mxlib());
my $mxprf_file = $self->mxprf();
if ($mxprf_file && -e $mxprf_file) {
$mxprf_file = File::Spec->rel2abs($mxprf_file);
}
# cd to a temp dir
my $temp_dir = $self->tempdir;
my $cwd = Cwd->cwd();
chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
# make the profile file if necessary
if (! $mxprf_file || ! -e $mxprf_file) {
my @thresh;
if ($mxprf_file && ref($mxprf_file) eq 'ARRAY') {
@thresh = @{$mxprf_file};
}
$mxprf_file = 'mxprf';
system("$exe $mxlib ignored ignored $mxprf_file -p @thresh") && $self->throw("Something went wrong whist creating profile: $! | $?");
}
# output the sequence to a fasta file
my $seq_file = 'sequence.fa';
my $so = Bio::SeqIO->new(-file => ">$seq_file", -format => 'fasta');
$so->write_seq($seq);
$so->close();
# run match
my $result_file = 'out';
my $param_str = $self->_setparams();
my $cmd_line = "$exe $mxlib $seq_file $result_file $mxprf_file".$param_str;
system($cmd_line) && $self->throw("Something went wrong whist running '$cmd_line': $! | $?");
# parse the results
my $parser = Bio::Tools::Match->new(-file => $result_file);
# correct the coords
my @feats;
while (my $feat = $parser->next_result) {
push(@feats, $feat);
}
# cd back again
chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
return @feats;
}
=head2 _setparams
Title : _setparams
Usage : Internal function, not to be called directly
Function: Creates a string of params to be used in the command string
Returns : string of params
Args : none
=cut
sub _setparams {
my $self = shift;
my $param_string = $self->SUPER::_setparams(-switches => \@SWITCHES,
-dash => 1);
my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
$param_string .= " 1>$null" if $self->quiet;
return $param_string;
}
1;