package KinoSearch::Index::TermDocs;
use strict;
use warnings;
use KinoSearch::Util::ToolSet;
use base qw( KinoSearch::Util::CClass );
our %instance_vars = __PACKAGE__->init_instance_vars();
=begin comment
$term_docs->seek($term);
Locate the TermDocs object at a particular term.
=end comment
=cut
sub seek { shift->abstract_death }
sub close { shift->abstract_death }
1;
__END__
__XS__
MODULE = KinoSearch PACKAGE = KinoSearch::Index::TermDocs
void
new(either_sv)
SV *either_sv;
PREINIT:
char *class;
TermDocs *term_docs;
PPCODE:
/* determine the class */
class = sv_isobject(either_sv)
? sv_reftype(either_sv, 0)
: SvPV_nolen(either_sv);
/* build object */
term_docs = Kino_TermDocs_new();
ST(0) = sv_newmortal();
sv_setref_pv(ST(0), class, (void*)term_docs);
XSRETURN(1);
void
seek_tinfo(term_docs, maybe_tinfo_sv)
TermDocs *term_docs;
SV *maybe_tinfo_sv;
PREINIT:
TermInfo *tinfo = NULL;
PPCODE:
/* if maybe_tinfo_sv is undef, tinfo is NULL */
if (SvOK(maybe_tinfo_sv)) {
Kino_extract_struct(maybe_tinfo_sv, tinfo,
TermInfo*, "KinoSearch::Index::TermInfo");
}
term_docs->seek_tinfo(term_docs, tinfo);
=begin comment
while ($term_docs->next) {
# ...
}
Advance the TermDocs object to the next document. Returns false when the
iterator is exhausted, true otherwise.
=end comment
=cut
bool
next(term_docs)
TermDocs *term_docs;
CODE:
RETVAL = term_docs->next(term_docs);
OUTPUT: RETVAL
U32
read(term_docs, doc_nums_sv, freqs_sv, num_wanted)
TermDocs *term_docs
SV *doc_nums_sv;
SV *freqs_sv;
U32 num_wanted;
CODE:
RETVAL = term_docs->read(term_docs, doc_nums_sv, freqs_sv, num_wanted);
OUTPUT: RETVAL
=begin comment
To do.
=end comment
=cut
bool
skip_to(term_docs, target)
TermDocs *term_docs;
U32 target;
CODE:
RETVAL = term_docs->skip_to(term_docs, target);
OUTPUT: RETVAL
SV*
_parent_set_or_get(term_docs, ...)
TermDocs *term_docs;
ALIAS:
set_doc = 1
get_doc = 2
set_freq = 3
get_freq = 4
set_positions = 5
get_positions = 6
set_doc_freq = 7
get_doc_freq = 8
PREINIT:
U32 num;
CODE:
{
/* if called as a setter, make sure the extra arg is there */
if (ix % 2 == 1 && items != 2)
Kino_confess("usage: $term_docs->set_xxxxxx($val)");
switch (ix) {
case 1: Kino_confess("Can't set_doc");
/* fall through */
case 2: num = term_docs->get_doc(term_docs);
RETVAL = num == KINO_TERM_DOCS_SENTINEL
? &PL_sv_undef
: newSVuv(num);
break;
case 3: Kino_confess("Can't set_freq");
/* fall through */
case 4: num = term_docs->get_freq(term_docs);
RETVAL = num == KINO_TERM_DOCS_SENTINEL
? &PL_sv_undef
: newSVuv(num);
break;
case 5: Kino_confess("Can't set_positions");
/* fall through */
case 6: RETVAL = newSVsv(term_docs->get_positions(term_docs));
break;
case 7: term_docs->set_doc_freq(term_docs, (U32)SvUV(ST(1)) );
/* fall through */
case 8: num = term_docs->get_doc_freq(term_docs);
RETVAL = num == KINO_TERM_DOCS_SENTINEL
? &PL_sv_undef
: newSVuv(num);
break;
}
}
OUTPUT: RETVAL
void
DESTROY(term_docs)
TermDocs *term_docs;
PPCODE:
term_docs->destroy(term_docs);
__H__
#ifndef H_KINO_TERM_DOCS
#define H_KINO_TERM_DOCS 1
#define KINO_TERM_DOCS_SENTINEL 0xFFFFFFFF
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearchUtilMemManager.h"
#include "KinoSearchIndexTermInfo.h"
typedef struct termdocs {
void *child;
SV *positions;
void (*set_doc_freq)(struct termdocs*, U32);
U32 (*get_doc_freq)(struct termdocs*);
U32 (*get_doc)(struct termdocs*);
U32 (*get_freq)(struct termdocs*);
SV* (*get_positions)(struct termdocs*);
void (*seek_tinfo)(struct termdocs*, TermInfo*);
bool (*next)(struct termdocs*);
bool (*skip_to)(struct termdocs*, U32);
U32 (*read)(struct termdocs*, SV*, SV*, U32);
void (*destroy)(struct termdocs*);
} TermDocs;
TermDocs* Kino_TermDocs_new();
void Kino_TermDocs_set_doc_freq_death(TermDocs*, U32);
U32 Kino_TermDocs_get_doc_freq_death(TermDocs*);
U32 Kino_TermDocs_get_doc_death(TermDocs*);
U32 Kino_TermDocs_get_freq_death(TermDocs*);
SV* Kino_TermDocs_get_positions_death(TermDocs*);
void Kino_TermDocs_seek_tinfo_death(TermDocs*, TermInfo*);
bool Kino_TermDocs_next_death(TermDocs*);
bool Kino_TermDocs_skip_to_death(TermDocs*, U32);
U32 Kino_TermDocs_read_death(TermDocs*, SV*, SV*, U32);
void Kino_TermDocs_destroy(TermDocs*);
#endif /* include guard */
__C__
#include "KinoSearchIndexTermDocs.h"
TermDocs*
Kino_TermDocs_new() {
TermDocs* term_docs;
Kino_New(0, term_docs, 1, TermDocs);
term_docs->child = NULL;
/* force the subclass to override functions */
term_docs->set_doc_freq = Kino_TermDocs_set_doc_freq_death;
term_docs->get_doc_freq = Kino_TermDocs_get_doc_freq_death;
term_docs->get_doc = Kino_TermDocs_get_doc_death;
term_docs->get_freq = Kino_TermDocs_get_freq_death;
term_docs->get_positions = Kino_TermDocs_get_positions_death;
term_docs->seek_tinfo = Kino_TermDocs_seek_tinfo_death;
term_docs->next = Kino_TermDocs_next_death;
term_docs->skip_to = Kino_TermDocs_skip_to_death;
term_docs->destroy = Kino_TermDocs_destroy;
return term_docs;
}
void
Kino_TermDocs_set_doc_freq_death(TermDocs *term_docs, U32 doc_freq) {
Kino_confess("term_docs->set_doc_freq must be defined in a subclass");
}
U32
Kino_TermDocs_get_doc_freq_death(TermDocs *term_docs) {
Kino_confess("term_docs->get_doc_freq must be defined in a subclass");
}
U32
Kino_TermDocs_get_doc_death(TermDocs *term_docs) {
Kino_confess("term_docs->get_doc must be defined in a subclass");
}
U32
Kino_TermDocs_get_freq_death(TermDocs *term_docs) {
Kino_confess("term_docs->get_freq must be defined in a subclass");
}
SV*
Kino_TermDocs_get_positions_death(TermDocs *term_docs) {
Kino_confess("term_docs->get_positions must be defined in a subclass");
}
void
Kino_TermDocs_seek_tinfo_death(TermDocs *term_docs, TermInfo *tinfo) {
Kino_confess("term_docs->seek_tinfo must be defined in a subclass");
}
bool
Kino_TermDocs_next_death(TermDocs *term_docs) {
Kino_confess("term_docs->next must be defined in a subclass");
}
U32
Kino_TermDocs_read_death(TermDocs* term_docs, SV* doc_nums_sv, SV* freqs_sv,
U32 num_wanted) {
Kino_confess("term_docs->read must be defined in a subclass");
}
bool
Kino_TermDocs_skip_to_death(TermDocs *term_docs, U32 target) {
Kino_confess("term_docs->skip_to must be defined in a subclass");
}
void
Kino_TermDocs_destroy(TermDocs *term_docs) {
Kino_Safefree(term_docs);
}
__POD__
=begin devdocs
=head1 NAME
KinoSearch::Index::TermDocs - retrieve list of docs which contain a Term
=head1 SYNOPSIS
# abstract base class, but here's how a subclass works:
$term_docs->seek($term);
my $num_got = $term_docs->read( $docs, $freqs, $num_to_read );
my @doc_nums = unpack( 'I*', $docs );
my @tf_ds = unpack( 'I*', $freqs ); # term frequency in document
# alternately...
$term_docs->set_read_positions(1);
while ($term_docs->next) {
do_something_with(
doc => $term_docs->get_doc,
freq => $term_docs->get_freq,
positions => $term_docs->get_positions,
);
}
=head1 DESCRIPTION
Feed a TermDocs object a Term to get docs (and freqs). If a term is present
in the portion of an index that a TermDocs subclass is responsible for, the
object is used to access the doc_nums for the documents in which it appears,
plus the number of appearances, plus (optionally), the positions at which the
term appears in the document.
=head1 COPYRIGHT
Copyright 2005-2006 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
See L<KinoSearch|KinoSearch> version 0.09.
=end devdocs
=cut