use strict;
use warnings;

package KSx::Index::ZlibDocReader;
use base qw( KinoSearch::Index::DocReader );
use KinoSearch::Util::StringHelper qw( utf8_valid utf8_flag_on );
use Compress::Zlib qw( uncompress );
use Carp;

# Inside-out member vars.
our %ix_in;
our %dat_in;
our %binary_fields;

sub new {
    my ( $either, %args ) = @_;
    my $self = $either->SUPER::new(%args);

    # Validate metadata.  Only open streams if the segment has data we
    # recognize.
    my $segment  = $self->get_segment;
    my $metadata = $segment->fetch_metadata("zdocs");
    if ($metadata) {
        if ( $metadata->{format} != 1 ) {
            confess("Unrecognized format: '$metadata->{format}'");
        }

        # Open InStreams.
        my $dat_filename = $segment->get_name . "/zdocs.dat";
        my $ix_filename  = $segment->get_name . "/zdocs.ix";
        my $folder       = $self->get_folder;
        $ix_in{$$self} = $folder->open_in($ix_filename)
            or confess KinoSearch->error;
        $dat_in{$$self} = $folder->open_in($dat_filename)
            or confess KinoSearch->error;

        # Remember which fields are binary.
        my $schema = $self->get_schema;
        my $bin_fields = $binary_fields{$$self} = {};
        $bin_fields->{$_} = 1
            for grep { $schema->fetch_type($_)->binary }
            @{ $schema->all_fields };
    }

    return $self;
}

sub fetch {
    my ( $self, %args ) = @_;
    my $doc_id     = delete $args{doc_id};
    my $offset     = delete $args{offset} || 0;
    my $score      = delete $args{score} || 0;
    my $dat_in     = $dat_in{$$self};
    my $ix_in      = $ix_in{$$self};
    my $bin_fields = $binary_fields{$$self};

    # Bail if no data in segment.
    return unless $ix_in;

    # Read index information.
    $ix_in->seek( $doc_id * 8 );
    my $start = $ix_in->read_i64;
    my $len   = $ix_in->read_i64 - $start;
    my $compressed;

    # Read main data.
    $dat_in->seek($start);
    $dat_in->read( $compressed, $len );
    my $inflated      = uncompress($compressed);
    my $num_fields    = unpack( "w", $inflated );
    my $pack_template = "w ";
    $pack_template .= "w/a*" x ( $num_fields * 2 );
    my ( undef, %fields ) = unpack( $pack_template, $inflated );

    # Turn on UTF-8 flag for string fields.
    for my $field ( keys %fields ) {
        next if $bin_fields->{$field};
        utf8_flag_on( $fields{$field} );
        confess("Invalid UTF-8 read for doc $doc_id field '$field'")
            unless utf8_valid( $fields{$field} );
    }

    return KinoSearch::Document::HitDoc->new(
        fields => \%fields,
        doc_id => $doc_id + $offset,
        score  => $score,
    );
}

sub read_record {
    my ( $self, $doc_id, $buf ) = @_;
    my $dat_in     = $dat_in{$$self};
    my $ix_in      = $ix_in{$$self};
    my $bin_fields = $binary_fields{$$self};

    if ($ix_in) {
        $ix_in->seek( $doc_id * 8 );
        my $start = $ix_in->read_i64;
        my $len   = $ix_in->read_i64 - $start;
        $dat_in->seek($start);
        $dat_in->read( $$buf, $len );
    }
}

sub close {
    my $self = shift;
    delete $ix_in{$$self};
    delete $dat_in{$$self};
    delete $binary_fields{$$self};
}

sub DESTROY {
    my $self = shift;
    delete $ix_in{$$self};
    delete $dat_in{$$self};
    delete $binary_fields{$$self};
    $self->SUPER::DESTROY;
}

1;

__END__

__POD__

=head1 NAME

KSx::Index::ZlibDocReader - Compressed doc storage.

=head1 DESCRIPTION

This is a proof-of-concept class to demonstrate alternate implementations for
fetching documents.  It is unsupported.

=head1 COPYRIGHT

Copyright 2009-2010 Marvin Humphrey

=head1 LICENSE, DISCLAIMER, BUGS, etc.

See L<KinoSearch> version 0.30.

=cut