use strict;
use warnings;

package KinoSearch::Store::OutStream;
use KinoSearch::Util::ToolSet;
use base qw( KinoSearch::Util::Obj );

1;

__END__

__XS__

MODULE = KinoSearch     PACKAGE = KinoSearch::Store::OutStream

kino_OutStream*
new(class, file_des)
    const classname_char *class;
    kino_FileDes *file_des;
CODE:
    CHY_UNUSED_VAR(class);
    RETVAL = kino_OutStream_new(file_des);
OUTPUT: RETVAL

void
print(self, ...)
    kino_OutStream *self;
PPCODE:
{
    int i;
    for (i = 1; i < items; i++) {
        STRLEN len;
        char *ptr = SvPV( ST(i), len);
        Kino_OutStream_Write_Bytes(self, ptr, len);
    }
}

void
sseek(self, target)
    kino_OutStream *self;
    chy_u64_t       target;
PPCODE:
    Kino_OutStream_SSeek(self, target);


chy_u64_t
stell(self)
    kino_OutStream *self;
CODE:
    RETVAL = Kino_OutStream_STell(self);
OUTPUT: RETVAL


chy_u64_t
slength(self)
    kino_OutStream *self;
CODE:
    RETVAL = Kino_OutStream_SLength(self);
OUTPUT: RETVAL


void
sflush(self)
    kino_OutStream *self;
PPCODE:
    Kino_OutStream_SFlush(self);

void
sclose(self)
    kino_OutStream *self;
PPCODE:
    Kino_OutStream_SClose(self);


void
absorb(self, instream)
    kino_OutStream *self;
    kino_InStream  *instream;
PPCODE:
    Kino_OutStream_Absorb(self, instream);

void
_set_or_get(self, ...)
    kino_OutStream *self;
ALIAS:
    get_file_des = 2
PPCODE:
{
    START_SET_OR_GET_SWITCH

    case 2:  retval = kobj_to_pobj(self->file_des);
             break;
    
    END_SET_OR_GET_SWITCH
}

=begin comment

    $outstream->lu_write( TEMPLATE, LIST );

Write the items in LIST to the OutStream using the serialization schemes
specified by TEMPLATE.

=end comment
=cut

void
lu_write (self, template_sv, ...)
    kino_OutStream *self;
    SV *template_sv;
PPCODE:
{
    STRLEN      tpt_len;          /* bytelength of template */
    char       *template;         /* ptr to a spot in the template */
    char       *tpt_end;          /* ptr to the end of the template */
    int         repeat_count = 0; /* number of times to repeat sym */
    int         item_count   = 2; /* num elements in @_ processed */
    char        sym = '\0';       /* the current symbol in the template */
    chy_i32_t   aI32;
    chy_u32_t   aU32;
    SV         *aSV;
    char       *string;
    STRLEN      string_len;

    /* require an object, a template, and at least 1 item */
    if (items < 2) {
        CONFESS("lu_write error: too few arguments");
    }

    /* prepare the template and get pointers */
    template = SvPV(template_sv, tpt_len);
    tpt_end  = template + tpt_len;

    /* reject an empty template */
    if (tpt_len == 0) {
        CONFESS("lu_write error: TEMPLATE cannot be empty string");
    }
        
    while (1) {
        /* only process template if we're not in the midst of a repeat */
        if (repeat_count == 0) {
            /* fast-forward past space characters */
            while (*template == ' ' && template < tpt_end) {
                template++;
            }

            /* if we're done, return or throw error */
            if (template == tpt_end || item_count == items) {
                if (item_count != items) {
                    CONFESS( "Too many ITEMS, not enough TEMPLATE");
                }
                else if (template != tpt_end) {
                    CONFESS("Too much TEMPLATE, not enough ITEMS");
                }
                else { /* success! */
                    break;
                }
            }

            /* derive the current symbol */
            sym = *template++;

            if (template == tpt_end) { /* sym is last char in template */
                repeat_count = 1;
            }
            else {
                char countsym = *template;
                if (countsym >= '0' && countsym <= '9') {
                    /* calculate numerical repeat count */
                    repeat_count = countsym - KINO_NUM_CHAR_OFFSET;
                    countsym = *(++template);
                    while (  template <= tpt_end 
                          && countsym >= '0' 
                          && countsym <= '9'
                    ) {
                        repeat_count = (repeat_count * 10) 
                            + (countsym - KINO_NUM_CHAR_OFFSET);
                        countsym = *(++template);
                    }
                }
                else { /* no numeric repeat count, so process sym only once */
                    repeat_count = 1;
                }
            }
        }
        /* sanity check */
        else if (repeat_count < 0) {
            CONFESS("invalid repeat_count: %d", repeat_count);
        }


        switch(sym) {

        case 'a': /* arbitrary binary data */
            aSV  = ST(item_count);
            if (!SvOK(aSV)) {
                CONFESS("Internal error: undef at lu_write 'a'");
            }
            string = SvPV(aSV, string_len);
            if ((STRLEN)repeat_count != string_len) {
                CONFESS("repeat_count != string_len: %d %d", repeat_count, 
                string_len);
            }
            Kino_OutStream_Write_Bytes(self, string, string_len);
            /* trigger next sym */
            repeat_count = 1; 
            break;

        case 'b': /* signed byte */
        case 'B': /* unsigned byte */
            aI32 = SvIV( ST(item_count) );
            Kino_OutStream_Write_Byte(self, (char)(aI32 & 0xff));
            break;

        case 'i': /* signed 32-bit integer */
            aI32 = SvIV( ST(item_count) );
            Kino_OutStream_Write_Int(self, (chy_u32_t)aI32);
            break;
            

        case 'I': /* unsigned 32-bit integer */
            aU32 = SvUV( ST(item_count) );
            Kino_OutStream_Write_Int(self, aU32);
            break;
            
        case 'Q': /* unsigned "64-bit" integer */
            {
                SV *const this_sv = ST(item_count);
                if (SvIOK(this_sv))
                    Kino_OutStream_Write_Long(self, SvUV(this_sv));
                else
                    Kino_OutStream_Write_Long(self, SvNV(this_sv));
            }
            break;
        
        case 'V': /* VInt */
            aU32 = SvUV( ST(item_count) );
            Kino_OutStream_Write_VInt(self, aU32);
            break;

        case 'W': /* VLong */
            {
                SV *const this_sv = ST(item_count);
                if (SvIOK(this_sv))
                    Kino_OutStream_Write_VLong(self, SvUV(this_sv));
                else
                    Kino_OutStream_Write_VLong(self, SvNV(this_sv));
            }
            break;

        case 'T': /* string */
            aSV        = ST(item_count);
            string     = SvPV(aSV, string_len);
            Kino_OutStream_Write_String(self, string, string_len);
            break;

        default: 
            CONFESS("Illegal character in template: %c", sym);
        }

        /* use up one repeat_count and one item from the stack */
        repeat_count--;
        item_count++;
    }
}

__POD__


=begin devdocs

=head1 PRIVATE CLASS

KinoSearch::Store::OutStream - Filehandles for writing invindexes.

=head1 SYNOPSIS

    # isa blessed filehandle

    my $outstream = $folder->open_outstream( $filename );
    $outstream->lu_write( 'V8', @eight_vints );

=head1 DESCRIPTION

The OutStream class abstracts all of KinoSearch's output operations.  It is
akin to a narrowly-implemented, specialized IO::File.

Unlike its counterpart InStream, OutStream cannot be assigned an arbitrary
C<length> or C<offset>.

=head2 Buffering

OutStream objects maintain their own buffers and do not write their contents to
disk on the same schedules as Perl filehandles.

=head2 lu_write / lu_read template

lu_write and it's opposite number, InStream's lu_read, provide a
pack/unpack-style interface for handling primitive data types required by the
Lucene index file format.  The most notable of these specialized data types is
the VInt, or Variable Integer, which is similar to the BER compressed integer
(pack template 'w').

All fixed-width integer formats are stored in big-endian order (high-byte
first).  Signed integers use twos-complement encoding.  The maximum allowable
value both Long and VLong is 2**52 because it is stored inside the NV (double)
storage pocket of a perl Scalar, which has a 53-bit mantissa.
 
    a   Arbitrary binary data, copied to/from the scalar's PV (string)

    b   8-bit  integer, signed
    B   8-bit  integer, unsigned

    i   32-bit integer, signed
    I   32-bit integer, unsigned

    Q   64-bit integer, unsigned                (max value 2**52)

    V   VInt   variable-width integer, unsigned (max value 2**32)
    W   VLong  variable-width integer, unsigned (max value 2**52)

    T   Lucene string, which is a VInt indicating the length in bytes 
        followed by the string.  The string must be valid UTF-8.

Numeric repeat counts are supported:

    $outstream->lu_write( 'V2 T', 0, 1, "a string" );
     
Other features of pack/unpack such as parentheses, infinite repeats via '*',
and slash notation are not.  A numeric repeat count following 'a' indicates
how many bytes to read, while a count following any other symbol indicates how
many scalars of that type to return.

    ( $three_byte_string, @eight_vints ) = $instream->lu_read('a3V8');

The behavior of lu_read and lu_write is much more strict with regards to a
mismatch between TEMPLATE and LIST than pack/unpack, which are fairly
forgiving in what they will accept.  lu_read will confess() if it cannot read
all the items specified by TEMPLATE from the InStream, and lu_write will
confess() if the number of items in LIST does not match the expression in
TEMPLATE.

=head1 COPYRIGHT

Copyright 2005-2007 Marvin Humphrey

=head1 LICENSE, DISCLAIMER, BUGS, etc.

See L<KinoSearch> version 0.20.

=end devdocs
=cut