# IO::Zlib.pm
#
# Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>.
# All rights reserved. This program is free software; you can redistribute
# it and/or modify it under the same terms as Perl itself.

package IO::Zlib;

=head1 NAME

IO::Zlib - IO:: style interface to L<Compress::Zlib>

=head1 SYNOPSIS

With any version of Perl 5 you can use the basic OO interface:

    use IO::Zlib;

    $fh = new IO::Zlib;
    if ($fh->open("file.gz", "rb")) {
        print <$fh>;
        $fh->close;
    }

    $fh = IO::Zlib->new("file.gz", "wb9");
    if (defined $fh) {
        print $fh "bar\n";
        $fh->close;
    }

    $fh = IO::Zlib->new("file.gz", "rb");
    if (defined $fh) {
        print <$fh>;
        undef $fh;       # automatically closes the file
    }

With Perl 5.004 you can also use the TIEHANDLE interface to access
compressed files just like ordinary files:

    use IO::Zlib;

    tie *FILE, 'IO::Zlib', "file.gz", "wb";
    print FILE "line 1\nline2\n";

    tie *FILE, 'IO::Zlib', "file.gz", "rb";
    while (<FILE>) { print "LINE: ", $_ };

=head1 DESCRIPTION

C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and
hence to gzip/zlib compressed files. It provides many of the same methods
as the L<IO::Handle> interface.

Starting from IO::Zlib version 1.02, IO::Zlib can also use an
external F<gzip> command.  The default behaviour is to try to use
an external F<gzip> if no C<Compress::Zlib> can be loaded, unless
explicitly disabled by

    use IO::Zlib qw(:gzip_external 0);

If explicitly enabled by

    use IO::Zlib qw(:gzip_external 1);

then the external F<gzip> is used B<instead> of C<Compress::Zlib>.

=head1 CONSTRUCTOR

=over 4

=item new ( [ARGS] )

Creates an C<IO::Zlib> object. If it receives any parameters, they are
passed to the method C<open>; if the open fails, the object is destroyed.
Otherwise, it is returned to the caller.

=back

=head1 OBJECT METHODS

=over 4

=item open ( FILENAME, MODE )

C<open> takes two arguments. The first is the name of the file to open
and the second is the open mode. The mode can be anything acceptable to
L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that
basically means POSIX fopen() style mode strings plus an optional number
to indicate the compression level).

=item opened

Returns true if the object currently refers to a opened file.

=item close

Close the file associated with the object and disassociate
the file from the handle.
Done automatically on destroy.

=item getc

Return the next character from the file, or undef if none remain.

=item getline

Return the next line from the file, or undef on end of string.
Can safely be called in an array context.
Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English>
is in use) and treats lines as delimited by "\n".

=item getlines

Get all remaining lines from the file.
It will croak() if accidentally called in a scalar context.

=item print ( ARGS... )

Print ARGS to the  file.

=item read ( BUF, NBYTES, [OFFSET] )

Read some bytes from the file.
Returns the number of bytes actually read, 0 on end-of-file, undef on error.

=item eof

Returns true if the handle is currently positioned at end of file?

=item seek ( OFFSET, WHENCE )

Seek to a given position in the stream.
Not yet supported.

=item tell

Return the current position in the stream, as a numeric offset.
Not yet supported.

=item setpos ( POS )

Set the current position, using the opaque value returned by C<getpos()>.
Not yet supported.

=item getpos ( POS )

Return the current position in the string, as an opaque object.
Not yet supported.

=back

=head1 USING THE EXTERNAL GZIP

If the external F<gzip> is used, the following C<open>s are used:

    open(FH, "gzip -dc $filename |")  # for read opens
    open(FH, " | gzip > $filename")   # for write opens

You can modify the 'commands' for example to hardwire
an absolute path by e.g.

    use IO::Zlib ':gzip_read_open'  => '/some/where/gunzip -c %s |';
    use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s';

The C<%s> is expanded to be the filename (C<sprintf> is used, so be
careful to escape any other C<%> signs).  The 'commands' are checked
for sanity - they must contain the C<%s>, and the read open must end
with the pipe sign, and the write open must begin with the pipe sign.

=head1 CLASS METHODS

=over 4

=item has_Compress_Zlib

Returns true if C<Compress::Zlib> is available.  Note that this does
not mean that C<Compress::Zlib> is being used: see L</gzip_external>
and L<gzip_used>.

=item gzip_external

Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is
not available (see L</has_Compress_Zlib>), true if an external F<gzip>
is explicitly used, false if an external F<gzip> must not be used.
See L</gzip_used>.

=item gzip_used

True if an external F<gzip> is being used, false if not.

=item gzip_read_open

Return the 'command' being used for opening a file for reading using an
external F<gzip>.

=item gzip_write_open

Return the 'command' being used for opening a file for writing using an
external F<gzip>.

=back

=head1 DIAGNOSTICS

=over 4

=item IO::Zlib::getlines: must be called in list context

If you want read lines, you must read in list context.

=item IO::Zlib::gzopen_external: mode '...' is illegal

Use only modes 'rb' or 'wb' or /wb[1-9]/.

=item IO::Zlib::import: '...' is illegal

The known import symbols are the C<:gzip_external>, C<:gzip_read_open>,
and C<:gzip_write_open>.  Anything else is not recognized.

=item IO::Zlib::import: ':gzip_external' requires an argument

The C<:gzip_external> requires one boolean argument.

=item IO::Zlib::import: 'gzip_read_open' requires an argument

The C<:gzip_external> requires one string argument.

=item IO::Zlib::import: 'gzip_read' '...' is illegal

The C<:gzip_read_open> argument must end with the pipe sign (|)
and have the C<%s> for the filename.  See L</"USING THE EXTERNAL GZIP">.

=item IO::Zlib::import: 'gzip_write_open' requires an argument

The C<:gzip_external> requires one string argument.

=item IO::Zlib::import: 'gzip_write_open' '...' is illegal

The C<:gzip_write_open> argument must begin with the pipe sign (|)
and have the C<%s> for the filename.  An output redirect (>) is also
often a good idea, depending on your operating system shell syntax.
See L</"USING THE EXTERNAL GZIP">.

=item IO::Zlib::import: no Compress::Zlib and no external gzip

Given that we failed to load C<Compress::Zlib> and that the use of
 an external F<gzip> was disabled, IO::Zlib has not much chance of working.

=item IO::Zlib::open: needs a filename

No filename, no open.

=item IO::Zlib::READ: NBYTES must be specified

We must know how much to read.

=item IO::Zlib::WRITE: too long LENGTH

The LENGTH must be less than or equal to the buffer size.

=back

=head1 SEE ALSO

L<perlfunc>,
L<perlop/"I/O Operators">,
L<IO::Handle>,
L<Compress::Zlib>

=head1 HISTORY

Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>.

Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>.

=head1 COPYRIGHT

Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>.
All rights reserved. This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

=cut

require 5.006;

use strict;
use warnings;

use Carp;
use Fcntl qw(SEEK_SET);
use Symbol;
use Tie::Handle;

our $VERSION = "1.11";
our $AUTOLOAD;
our @ISA = qw(Tie::Handle);

my $has_Compress_Zlib;
my $gzip_external;
my $gzip_used;
my $gzip_read_open = "gzip -dc %s |";
my $gzip_write_open = "| gzip > %s";
my $aliased;

BEGIN {
    eval { require Compress::Zlib };
    $has_Compress_Zlib = $@ || $Compress::Zlib::VERSION < 2.000 ? 0 : 1;
}

sub has_Compress_Zlib
{
    $has_Compress_Zlib;
}

sub gzip_external
{
    $gzip_external;
}

sub gzip_used
{
    $gzip_used;
}

sub gzip_read_open
{
    $gzip_read_open;
}

sub gzip_write_open
{
    $gzip_write_open;
}

sub can_gunzip
{
    $has_Compress_Zlib || $gzip_external;
}

sub _import
{
    my $import = shift;

    while (@_)
    {
        if ($_[0] eq ':gzip_external')
        {
            shift;

            if (@_)
            {
                $gzip_external = shift;
            }
            else
            {
                croak "$import: ':gzip_external' requires an argument";
            }
        }
        elsif ($_[0] eq ':gzip_read_open')
        {
            shift;

            if (@_)
            {
                $gzip_read_open = shift;

                croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal"
                    unless $gzip_read_open =~ /^.+%s.+\|\s*$/;
            }
            else
            {
                croak "$import: ':gzip_read_open' requires an argument";
            }
        }
        elsif ($_[0] eq ':gzip_write_open')
        {
            shift;

            if (@_)
            {
                $gzip_write_open = shift;

                croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal"
                    unless $gzip_write_open =~ /^\s*\|.+%s.*$/;
            }
            else
            {
                croak "$import: ':gzip_write_open' requires an argument";
            }
        }
        else
        {
            last;
        }
    }

    return @_;
}

sub _alias
{
    my $import = shift;

    if ($gzip_external || (!$has_Compress_Zlib && !defined($gzip_external)))
    {
        require IO::Handle;

        undef *gzopen;
        *gzopen = \&gzopen_external;

        *IO::Handle::gzread = \&gzread_external;
        *IO::Handle::gzwrite = \&gzwrite_external;
        *IO::Handle::gzreadline = \&gzreadline_external;
        *IO::Handle::gzeof = \&gzeof_external;
        *IO::Handle::gzclose = \&gzclose_external;

        $gzip_used = 1;
    }
    elsif ($has_Compress_Zlib)
    {
        *gzopen = \&Compress::Zlib::gzopen;
        *gzread = \&Compress::Zlib::gzread;
        *gzwrite = \&Compress::Zlib::gzwrite;
        *gzreadline = \&Compress::Zlib::gzreadline;
        *gzeof = \&Compress::Zlib::gzeof;
    }
    else
    {
        croak "$import: no Compress::Zlib and no external gzip";
    }

    $aliased = 1;
}

sub import
{
    my $class = shift;
    my $import = "IO::Zlib::import";

    if (@_)
    {
        if (_import($import, @_))
        {
            croak "$import: '@_' is illegal";
        }
    }

    _alias($import);
}

sub TIEHANDLE
{
    my $class = shift;
    my @args = @_;

    my $self = bless {}, $class;

    return @args ? $self->OPEN(@args) : $self;
}

sub DESTROY
{
}

sub OPEN
{
    my $self = shift;
    my $filename = shift;
    my $mode = shift;

    croak "IO::Zlib::open: needs a filename" unless defined($filename);

    $self->{'file'} = gzopen($filename,$mode);

    return defined($self->{'file'}) ? $self : undef;
}

sub CLOSE
{
    my $self = shift;

    return undef unless defined($self->{'file'});

    my $status = $self->{'file'}->gzclose();

    delete $self->{'file'};

    return ($status == 0) ? 1 : undef;
}

sub READ
{
    my $self = shift;
    my $bufref = \$_[0];
    my $nbytes = $_[1];
    my $offset = $_[2] || 0;

    croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes);

    $$bufref = "" unless defined($$bufref);

    my $bytesread = $self->{'file'}->gzread(substr($$bufref,$offset),$nbytes);

    return undef if $bytesread < 0;

    return $bytesread;
}

sub READLINE
{
    my $self = shift;

    my $line;

    return () if $self->{'file'}->gzreadline($line) <= 0;

    return $line unless wantarray;

    my @lines = $line;

    while ($self->{'file'}->gzreadline($line) > 0)
    {
        push @lines, $line;
    }

    return @lines;
}

sub WRITE
{
    my $self = shift;
    my $buf = shift;
    my $length = shift;
    my $offset = shift;

    croak "IO::Zlib::WRITE: too long LENGTH" unless $offset + $length <= length($buf);

    return $self->{'file'}->gzwrite(substr($buf,$offset,$length));
}

sub EOF
{
    my $self = shift;

    return $self->{'file'}->gzeof();
}

sub FILENO
{
    return undef;
}

sub new
{
    my $class = shift;
    my @args = @_;

    _alias("new", @_) unless $aliased; # Some call new IO::Zlib directly...

    my $self = gensym();

    tie *{$self}, $class, @args;

    return tied(${$self}) ? bless $self, $class : undef;
}

sub getline
{
    my $self = shift;

    return scalar tied(*{$self})->READLINE();
}

sub getlines
{
    my $self = shift;

    croak "IO::Zlib::getlines: must be called in list context"
        unless wantarray;

    return tied(*{$self})->READLINE();
}

sub opened
{
    my $self = shift;

    return defined tied(*{$self})->{'file'};
}

sub AUTOLOAD
{
    my $self = shift;

    $AUTOLOAD =~ s/.*:://;
    $AUTOLOAD =~ tr/a-z/A-Z/;

    return tied(*{$self})->$AUTOLOAD(@_);
}

sub gzopen_external
{
    my $filename = shift;
    my $mode = shift;
    my $fh = IO::Handle->new();

    if ($mode =~ /r/)
    {
        # Because someone will try to read ungzipped files
        # with this we peek and verify the signature.  Yes,
        # this means that we open the file twice (if it is
        # gzipped).
        # Plenty of race conditions exist in this code, but
        # the alternative would be to capture the stderr of
        # gzip and parse it, which would be a portability nightmare.
        if (-e $filename && open($fh, $filename))
        {
            binmode $fh;

            my $sig;
            my $rdb = read($fh, $sig, 2);

            if ($rdb == 2 && $sig eq "\x1F\x8B")
            {
                my $ropen = sprintf($gzip_read_open, $filename);

                if (open($fh, $ropen))
                {
                    binmode $fh;

                    return $fh;
                }
                else
                {
                    return undef;
                }
            }

            seek($fh, 0, SEEK_SET) or
                die "IO::Zlib: open('$filename', 'r'): seek: $!";

            return $fh;
        }
        else
        {
            return undef;
        }
    }
    elsif ($mode =~ /w/)
    {
        my $level = $mode =~ /([1-9])/ ? "-$1" : "";

        # To maximize portability we would need to open
        # two filehandles here, one for "| gzip $level"
        # and another for "> $filename", and then when
        # writing copy bytes from the first to the second.
        # We are using IO::Handle objects for now, however,
        # and they can only contain one stream at a time.
        my $wopen = sprintf($gzip_write_open, $filename);

        if (open($fh, $wopen))
        {
            $fh->autoflush(1);
            binmode $fh;

            return $fh;
        }
        else
        {
            return undef;
        }
    }
    else
    {
        croak "IO::Zlib::gzopen_external: mode '$mode' is illegal";
    }

    return undef;
}

sub gzread_external
{
    my $file = shift;
    my $bufref = \$_[0];
    my $nbytes = $_[1] || 4096;

    # Use read() instead of sysread() because people may
    # mix reads and readlines, and we don't want to mess
    # the stdio buffering.  See also gzreadline_external()
    # and gzwrite_external().
    my $nread = read($file, $$bufref, $nbytes);

    return defined $nread ? $nread : -1;
}

sub gzwrite_external
{
    my $file = shift;
    my $buf = shift;

    # Using syswrite() is okay (cf. gzread_external())
    # since the bytes leave this process and buffering
    # is therefore not an issue.
    my $nwrote = syswrite($file, $buf);

    return defined $nwrote ? $nwrote : -1;
}

sub gzreadline_external
{
    my $file = shift;
    my $bufref = \$_[0];

    # See the comment in gzread_external().
    $$bufref = readline($file);

    return defined $$bufref ? length($$bufref) : -1;
}

sub gzeof_external
{
    my $file = shift;

    return eof($file);
}

sub gzclose_external
{
    my $file = shift;

    close($file);

    # I am not entirely certain why this is needed but it seems
    # the above close() always fails (as if the stream would have
    # been already closed - something to do with using external
    # processes via pipes?)
    return 0;
}

1;