use strict;
use warnings;
package KSx::Index::ByteBufDocWriter;
use base qw( KinoSearch::Index::DataWriter );
use Carp;
use Scalar::Util qw( blessed );
use bytes;
no bytes;
# Inside-out member vars.
our %field;
our %width;
our %outstream;
sub new {
my ( $either, %args ) = @_;
my $width = delete $args{width};
my $field = delete $args{field};
my $self = $either->SUPER::new(%args);
confess("Missing required param 'width'") unless defined $width;
confess("Missing required param 'field'") unless defined $field;
if ( $width < 1 ) { confess("'width' must be at least 1") }
$field{$$self} = $field;
$width{$$self} = $width;
return $self;
}
sub _lazy_init {
my $self = shift;
# Get outstream. Skip past non-doc #0.
my $folder = $self->get_folder;
my $filename = $self->get_segment->get_name . "/bytebufdocs.dat";
my $outstream = $outstream{$$self} = $folder->open_out($filename)
or confess("Can't open '$filename'");
my $nulls = "\0" x $width{$$self};
$outstream->write_bytes($nulls);
return $outstream;
}
sub add_inverted_doc {
my ( $self, %args ) = @_;
my $outstream = $outstream{$$self} || _lazy_init($self);
my $fields = $args{inverter}->get_doc->get_fields;
my $width = $width{$$self};
my $field = $field{$$self};
if ( bytes::length( $fields->{$field} ) != $width ) {
confess("Width of '$fields->{$field}' not $width");
}
$outstream->write_bytes( $fields->{$field} );
}
sub add_segment {
my ( $self, %args ) = @_;
my $seg_reader = $args{reader};
my $doc_map = $args{doc_map};
my $doc_max = $seg_reader->doc_max;
# Bail if the supplied segment is empty. */
return unless $doc_max;
my $outstream = $outstream{$$self} || _lazy_init($self);
my $doc_reader = $seg_reader->obtain("KinoSearch::Index::DocReader");
confess("Not a ByteBufDocReader")
unless ( blessed($doc_reader)
and $doc_reader->isa("KSx::Index::ByteBufDocReader") );
for ( my $i = 1; $i <= $doc_max; $i++ ) {
next unless $doc_map->get($i);
my $buf;
$doc_reader->read_record( $i, \$buf );
$outstream->write_bytes($buf);
}
}
sub merge_segment {
my ( $self, %args ) = @_;
my $seg_reader = $args{reader};
my $snapshot = $self->get_snapshot;
my $merged_file = $seg_reader->get_segment->get_name . "/bytebufdocs.dat";
$snapshot->delete_entry($merged_file);
$self->add_segment(%args);
}
sub finish {
my $self = shift;
my $outstream = $outstream{$$self};
if ($outstream) {
$outstream->close;
my $segment = $self->get_segment;
$segment->store_metadata(
key => 'bytebufdocs',
metadata => $self->metadata
);
}
}
sub format {1}
sub DESTROY {
my $self = shift;
delete $field{$$self};
delete $width{$$self};
delete $outstream{$$self};
$self->SUPER::DESTROY;
}
1;
__END__
__POD__
=head1 NAME
KSx::Index::ByteBufDocWriter - Write a Doc as a fixed-width byte array.
=head1 SYNOPSIS
Create an Architecture subclass which overrides register_doc_writer() and
register_doc_reader():
package MyArchitecture;
use base qw( KinoSearch::Architecture );
use KSx::Index::ByteBufDocReader;
use KSx::Index::ByteBufDocWriter;
sub register_doc_writer {
my ( $self, $seg_writer ) = @_;
my $doc_writer = KSx::Index::ByteBufDocWriter->new(
width => 16,
field => 'id',
snapshot => $seg_writer->get_snapshot,
segment => $seg_writer->get_segment,
polyreader => $seg_writer->get_polyreader,
);
$seg_writer->register(
api => "KinoSearch::Index::DocReader",
component => $doc_writer,
);
$seg_writer->add_writer($doc_writer);
}
sub register_doc_reader {
my ( $self, $seg_reader ) = @_;
my $doc_reader = KSx::Index::ByteBufDocReader->new(
width => 16,
schema => $seg_reader->get_schema,
folder => $seg_reader->get_folder,
segments => $seg_reader->get_segments,
seg_tick => $seg_reader->get_seg_tick,
snapshot => $seg_reader->get_snapshot,
);
$seg_reader->register(
api => 'KinoSearch::Index::DocReader',
component => $doc_reader,
);
}
package MySchema;
use base qw( KinoSearch::Schema );
sub architecture { MyArchitecture->new }
Proceed as normal in your indexer app, making sure that every supplied
document supplies a valid value for the field in question:
$indexer->add_doc({
title => $title,
content => $content,
id => $id, # <---- Must meet spec.
});
Then, in your search app:
my $searcher = KinoSearch::Searcher->new( index => '/path/to/index' );
my $hits = $searcher->hits( query => $query );
while ( my $id = $hits->next ) {
# $id is a plain old 16-byte Perl scalar instead of a Hit object
my $real_doc = $external_document_source->fetch($id);
...
}
=head1 DESCRIPTION
This is a proof-of-concept class to demonstrate alternate implementations for
fetching documents. It is unsupported.
=head1 COPYRIGHT
Copyright 2009 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
See L<KinoSearch> version 0.30.
=cut