package KinoSearch::Index::SegTermDocs;
use strict;
use warnings;
use KinoSearch::Util::ToolSet;
use base qw( KinoSearch::Index::TermDocs );

BEGIN {
    __PACKAGE__->init_instance_vars(
        # constructor params
        reader => undef,
    );
}
our %instance_vars;

sub new {
    my $self = shift->SUPER::new;
    confess kerror() unless verify_args( \%instance_vars, @_ );
    my %args = ( %instance_vars, @_ );
    my $reader = $args{reader};

    _init_child($self);

    # dupe some stuff from the parent reader.
    $self->_set_reader( $reader );
    $self->_set_skip_interval( $reader->get_skip_interval );
    $self->_set_freq_stream( $reader->get_freq_stream()->clone_stream );
    $self->_set_skip_stream( $reader->get_freq_stream()->clone_stream );
    $self->_set_prox_stream( $reader->get_prox_stream()->clone_stream );
    $self->_set_deldocs( $reader->get_deldocs );

    return $self;
}

sub seek {
    my ( $self, $term ) = @_;
    my $tinfo =
        defined $term
        ? $self->_get_reader()->fetch_term_info($term)
        : undef;
    $self->seek_tinfo($tinfo);
}

sub close {
    my $self = shift;
    $self->_get_freq_stream()->close;
    $self->_get_prox_stream()->close;
    $self->_get_skip_stream()->close;
}

1;

__END__
__XS__

MODULE = KinoSearch    PACKAGE = KinoSearch::Index::SegTermDocs

void
_init_child(term_docs)
    TermDocs *term_docs;
PPCODE:
    Kino_SegTermDocs_init_child(term_docs);

SV*
_set_or_get(term_docs, ...)
    TermDocs *term_docs;
ALIAS:
    _set_count         = 1
    _get_count         = 2
    _set_freq_stream   = 3
    _get_freq_stream   = 4
    _set_prox_stream   = 5
    _get_prox_stream   = 6
    _set_skip_stream   = 7
    _get_skip_stream   = 8
    _set_deldocs       = 9
    _get_deldocs       = 10
    _set_reader        = 11
    _get_reader        = 12
    set_read_positions = 13
    get_read_positions = 14
    _set_skip_interval = 15
    _get_skip_interval = 16
CODE:
{
    SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child;

    KINO_START_SET_OR_GET_SWITCH

    case 1:  child->count = SvUV(ST(1));
             /* fall through */
    case 2:  RETVAL = newSVuv(child->count);
             break;

    case 3:  SvREFCNT_dec(child->freq_stream_sv);
             child->freq_stream_sv = newSVsv( ST(1) );
             Kino_extract_struct( child->freq_stream_sv, child->freq_stream, 
                InStream*, "KinoSearch::Store::InStream");
             /* fall through */
    case 4:  RETVAL = newSVsv(child->freq_stream_sv);
             break;

    case 5:  SvREFCNT_dec(child->prox_stream_sv);
             child->prox_stream_sv = newSVsv( ST(1) );
             Kino_extract_struct( child->prox_stream_sv, child->prox_stream, 
                InStream*, "KinoSearch::Store::InStream");
             /* fall through */
    case 6:  RETVAL = newSVsv(child->prox_stream_sv);
             break;

    case 7:  SvREFCNT_dec(child->skip_stream_sv);
             child->skip_stream_sv = newSVsv( ST(1) );
             Kino_extract_struct( child->skip_stream_sv, child->skip_stream, 
                InStream*, "KinoSearch::Store::InStream");
             /* fall through */
    case 8:  RETVAL = newSVsv(child->skip_stream_sv);
             break;

    case 9:  SvREFCNT_dec(child->deldocs_sv);
             child->deldocs_sv = newSVsv( ST(1) );
             Kino_extract_struct( child->deldocs_sv, child->deldocs, 
                BitVector*, "KinoSearch::Index::DelDocs" );
             /* fall through */
    case 10: RETVAL = newSVsv(child->deldocs_sv);
             break;

    case 11: SvREFCNT_dec(child->reader_sv);
             if (!sv_derived_from( ST(1), "KinoSearch::Index::IndexReader") )
                Kino_confess("not a KinoSearch::Index::IndexReader");
             child->reader_sv = newSVsv( ST(1) );
             /* fall through */
    case 12: RETVAL = newSVsv(child->reader_sv);
             break;

    case 13: child->read_positions = SvTRUE( ST(1) ) ? 1 : 0;
             /* fall through */
    case 14: RETVAL = newSViv(child->read_positions);
             break;

    case 15: child->skip_interval = SvUV(ST(1));
             /* fall through */
    case 16: RETVAL = newSVuv(child->skip_interval);
             break;

    KINO_END_SET_OR_GET_SWITCH
}
OUTPUT: RETVAL

__H__

#ifndef H_KINO_SEG_TERM_DOCS
#define H_KINO_SEG_TERM_DOCS 1

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearchUtilBitVector.h"
#include "KinoSearchIndexTermDocs.h"
#include "KinoSearchIndexTermInfo.h"
#include "KinoSearchStoreInStream.h"
#include "KinoSearchUtilMemManager.h"

typedef struct segtermdocschild {
    U32        count;
    U32        doc_freq;
    U32        doc;
    U32        freq;
    U32        skip_doc;
    U32        skip_count;
    U32        num_skips;
    SV        *positions;
    U32        read_positions;
    U32        skip_interval;
    InStream  *freq_stream;
    InStream  *prox_stream;
    InStream  *skip_stream;
    bool       have_skipped;
    double     frq_fileptr;
    double     prx_fileptr;
    double     skip_fileptr;
    BitVector *deldocs;
    SV        *freq_stream_sv;
    SV        *prox_stream_sv;
    SV        *skip_stream_sv;
    SV        *deldocs_sv;
    SV        *reader_sv;
} SegTermDocsChild;

void Kino_SegTermDocs_init_child(TermDocs*);
void Kino_SegTermDocs_set_doc_freq(TermDocs*, U32);
U32  Kino_SegTermDocs_get_doc_freq(TermDocs*);
U32  Kino_SegTermDocs_get_doc(TermDocs*);
U32  Kino_SegTermDocs_get_freq(TermDocs*);
SV*  Kino_SegTermDocs_get_positions(TermDocs*);
U32  Kino_SegTermDocs_bulk_read(TermDocs*, SV*, SV*, U32);
void Kino_SegTermDocs_seek_tinfo(TermDocs*, TermInfo*);
bool Kino_SegTermDocs_next(TermDocs*);
bool Kino_SegTermDocs_skip_to(TermDocs*, U32 target);
bool Kino_SegTermDocs_skip_to_with_positions(TermDocs*);
void Kino_SegTermDocs_destroy(TermDocs*);

#endif /* include guard */

__C__

#include "KinoSearchIndexSegTermDocs.h"

static void
load_positions(TermDocs *term_docs);

void
Kino_SegTermDocs_init_child(TermDocs *term_docs) {
    SegTermDocsChild *child;

    Kino_New(1, child, 1, SegTermDocsChild);
    term_docs->child = child;

    child->doc_freq = KINO_TERM_DOCS_SENTINEL;
    child->doc      = KINO_TERM_DOCS_SENTINEL;
    child->freq     = KINO_TERM_DOCS_SENTINEL;

    /* child->positions starts life as an empty string */
    child->positions = newSV(1);
    SvCUR_set(child->positions, 0);
    SvPOK_on(child->positions);

    term_docs->set_doc_freq  = Kino_SegTermDocs_set_doc_freq;
    term_docs->get_doc_freq  = Kino_SegTermDocs_get_doc_freq;
    term_docs->get_doc       = Kino_SegTermDocs_get_doc;
    term_docs->get_freq      = Kino_SegTermDocs_get_freq;
    term_docs->get_positions = Kino_SegTermDocs_get_positions;
    term_docs->bulk_read     = Kino_SegTermDocs_bulk_read;
    term_docs->seek_tinfo    = Kino_SegTermDocs_seek_tinfo;
    term_docs->next          = Kino_SegTermDocs_next;
    term_docs->skip_to       = Kino_SegTermDocs_skip_to;
    term_docs->destroy       = Kino_SegTermDocs_destroy;

    child->freq_stream_sv   = &PL_sv_undef;
    child->prox_stream_sv   = &PL_sv_undef;
    child->skip_stream_sv   = &PL_sv_undef;
    child->deldocs_sv       = &PL_sv_undef;
    child->reader_sv        = &PL_sv_undef;
    child->count            = 0;

    child->read_positions = 0; /* off by default */
}

void
Kino_SegTermDocs_set_doc_freq(TermDocs *term_docs, U32 doc_freq) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;
    child->doc_freq = doc_freq;
}

U32
Kino_SegTermDocs_get_doc_freq(TermDocs *term_docs) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;
    return child->doc_freq;
}

U32
Kino_SegTermDocs_get_doc(TermDocs *term_docs) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;
    return child->doc;
}


U32
Kino_SegTermDocs_get_freq(TermDocs *term_docs) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;
    return child->freq;
}

SV*
Kino_SegTermDocs_get_positions(TermDocs *term_docs) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;
    return child->positions;
}

U32 
Kino_SegTermDocs_bulk_read(TermDocs *term_docs, SV* doc_nums_sv, 
                           SV* freqs_sv, U32 num_wanted) {
    SegTermDocsChild *child;
    InStream         *freq_stream;
    U32               doc_code;
    U32              *doc_nums;
    U32              *freqs;
    STRLEN            len;
    U32               num_got = 0;

    /* local copies */
    child       = (SegTermDocsChild*)term_docs->child;
    freq_stream = child->freq_stream;

    /* allocate space in supplied SVs and make them POK, if necessary */ 
    len = num_wanted * sizeof(U32);
    SvUPGRADE(doc_nums_sv, SVt_PV);
    SvUPGRADE(freqs_sv,    SVt_PV);
    SvPOK_on(doc_nums_sv);
    SvPOK_on(freqs_sv);
    doc_nums = (U32*)SvGROW(doc_nums_sv, len + 1);
    freqs    = (U32*)SvGROW(freqs_sv,    len + 1);

    while (child->count < child->doc_freq && num_got < num_wanted) {
        /* manually inlined call to term_docs->next */ 
        child->count++;
        doc_code = freq_stream->read_vint(freq_stream);;
        child->doc  += doc_code >> 1;
        if (doc_code & 1)
            child->freq = 1;
        else
            child->freq = freq_stream->read_vint(freq_stream);

        /* if the doc isn't deleted... */
        if ( !Kino_BitVec_get(child->deldocs, child->doc) ) {
            /* ... append to results */
            *doc_nums++ = child->doc;
            *freqs++    = child->freq;
            num_got++;
        }
    }

    /* set the string end to the end of the U32 array */
    SvCUR_set(doc_nums_sv, (num_got * sizeof(U32)));
    SvCUR_set(freqs_sv,    (num_got * sizeof(U32)));

    return num_got;
}

bool
Kino_SegTermDocs_next(TermDocs *term_docs) {
    SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child;
    InStream         *freq_stream = child->freq_stream;
    U32               doc_code;
    
    while (1) {
        /* bail if we're out of docs */
        if (child->count == child->doc_freq) {
            return 0;
        }

        /* decode delta doc */
        doc_code = freq_stream->read_vint(freq_stream);
        child->doc  += doc_code >> 1;

        /* if the stored num was odd, the freq is 1 */ 
        if (doc_code & 1) {
            child->freq = 1;
        }
        /* otherwise, freq was stored as a VInt. */
        else {
            child->freq = freq_stream->read_vint(freq_stream);
        } 

        child->count++;
        
        /* read positions if desired */
        if (child->read_positions)
            load_positions(term_docs);
        
        /* if the doc isn't deleted... success! */
        if (!Kino_BitVec_get(child->deldocs, child->doc))
            break;
    }
    return 1;
}

static void
load_positions(TermDocs *term_docs) {
    SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child;
    InStream *prox_stream = child->prox_stream;
    STRLEN len = child->freq * sizeof(U32);
    U32 *positions, *positions_end;
    U32 position = 0;

    SvGROW( child->positions, len );
    SvCUR_set(child->positions, len);
    positions = (U32*)SvPVX(child->positions);
    positions_end = (U32*)SvEND(child->positions);
    while (positions < positions_end) {
        position += prox_stream->read_vint(prox_stream);
        *positions++ = position;
    }
}

void
Kino_SegTermDocs_seek_tinfo(TermDocs *term_docs, TermInfo *tinfo) {
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;

    child->count = 0;

    if (tinfo == NULL) {
        child->doc_freq = 0;
    }
    else {
        child->doc          = 0;
        child->freq         = 0;
        child->skip_doc     = 0;
        child->skip_count   = 0;
        child->have_skipped = FALSE;
        child->num_skips    = tinfo->doc_freq / child->skip_interval;
        child->doc_freq     = tinfo->doc_freq;
        child->frq_fileptr  = tinfo->frq_fileptr;
        child->prx_fileptr  = tinfo->prx_fileptr;
        child->skip_fileptr = tinfo->frq_fileptr + tinfo->skip_offset;
        child->freq_stream->seek( child->freq_stream, tinfo->frq_fileptr );
        child->prox_stream->seek( child->prox_stream, tinfo->prx_fileptr );
    }
}

bool
Kino_SegTermDocs_skip_to(TermDocs *term_docs, U32 target) {
    SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child;
    
    if (child->doc_freq >= child->skip_interval) {
        InStream *freq_stream   = child->freq_stream;
        InStream *prox_stream   = child->prox_stream;
        InStream *skip_stream   = child->skip_stream;
        U32 last_skip_doc       = child->skip_doc;
        double last_frq_fileptr = freq_stream->tell(freq_stream);
        double last_prx_fileptr = -1;
        I32 num_skipped         = -1 - (child->count % child->skip_interval);

        if (!child->have_skipped) {
            child->skip_stream->seek(child->skip_stream, child->skip_fileptr);
            child->have_skipped = TRUE;
        }
        
        while (target > child->skip_doc) {
            last_skip_doc    = child->skip_doc;
            last_frq_fileptr = child->frq_fileptr;
            last_prx_fileptr = child->prx_fileptr;

            if (child->skip_doc != 0 && child->skip_doc >= child->doc) {
                num_skipped += child->skip_interval;
            }

            if (child->skip_count >= child->num_skips) {
                break;
            }

            child->skip_doc += skip_stream->read_vint(skip_stream);
            child->frq_fileptr += skip_stream->read_vint(skip_stream);
            child->prx_fileptr += skip_stream->read_vint(skip_stream);

            child->skip_count++;
        }

        /* if there's something to skip, skip it */
        if (last_frq_fileptr > freq_stream->tell(freq_stream)) {
            freq_stream->seek(freq_stream, last_frq_fileptr);
            if (child->read_positions) {
                prox_stream->seek(prox_stream, last_prx_fileptr);
            }
            child->doc = last_skip_doc;
            child->count += num_skipped;
        }
    }

    /* done skipping, so scan */
    do {
        if (!term_docs->next(term_docs)) {
            return FALSE;
        }
    } while (target > child->doc);
    return TRUE;
}

void 
Kino_SegTermDocs_destroy(TermDocs *term_docs){
    SegTermDocsChild *child;
    child = (SegTermDocsChild*)term_docs->child;

    SvREFCNT_dec(child->positions);
    SvREFCNT_dec(child->freq_stream_sv);
    SvREFCNT_dec(child->prox_stream_sv);
    SvREFCNT_dec(child->skip_stream_sv);
    SvREFCNT_dec(child->deldocs_sv);
    SvREFCNT_dec(child->reader_sv);

    Kino_Safefree(child);

    Kino_TermDocs_destroy(term_docs);
}

__POD__

=begin devdocs

=head1 NAME

KinoSearch::Index::SegTermDocs - single-segment TermDocs

=head1 DESCRIPTION

Single-segment implemetation of KinoSearch::Index::TermDocs.

=head1 COPYRIGHT

Copyright 2005-2006 Marvin Humphrey

=head1 LICENSE, DISCLAIMER, BUGS, etc.

See L<KinoSearch|KinoSearch> version 0.15.

=end devdocs
=cut