package KinoSearch1::Index::IndexReader; use strict; use warnings; use KinoSearch1::Util::ToolSet; use base qw( KinoSearch1::Util::Class ); BEGIN { __PACKAGE__->init_instance_vars( # constructor params / members invindex => undef, seg_infos => undef, close_invindex => 1, invindex_owner => 1, ); __PACKAGE__->ready_get(qw( invindex )); } use KinoSearch1::Store::FSInvIndex; use KinoSearch1::Index::SegReader; use KinoSearch1::Index::MultiReader; use KinoSearch1::Index::SegInfos; use KinoSearch1::Index::IndexFileNames qw( WRITE_LOCK_NAME WRITE_LOCK_TIMEOUT COMMIT_LOCK_NAME COMMIT_LOCK_TIMEOUT ); sub new { my $temp = shift->SUPER::new(@_); return $temp->_open_multi_or_segreader; } # Returns a subclass of IndexReader: either a MultiReader or a SegReader, # depending on whether an invindex contains more than one segment. sub _open_multi_or_segreader { my $self = shift; # confirm an InvIndex object or make one using a supplied filepath. if ( !a_isa_b( $self->{invindex}, 'KinoSearch1::Store::InvIndex' ) ) { $self->{invindex} = KinoSearch1::Store::FSInvIndex->new( path => $self->{invindex} ); } my $invindex = $self->{invindex}; # read the segments file and decide what to do my $reader; $invindex->run_while_locked( lock_name => COMMIT_LOCK_NAME, timeout => COMMIT_LOCK_TIMEOUT, do_body => sub { my $seg_infos = KinoSearch1::Index::SegInfos->new; $seg_infos->read_infos($invindex); # create a SegReader for each segment in the invindex my @seg_readers; for my $sinfo ( $seg_infos->infos ) { push @seg_readers, KinoSearch1::Index::SegReader->new( seg_name => $sinfo->get_seg_name, invindex => $invindex, ); } # if there's one SegReader use it; otherwise make a MultiReader $reader = @seg_readers == 1 ? $seg_readers[0] : KinoSearch1::Index::MultiReader->new( invindex => $invindex, sub_readers => \@seg_readers, ); }, ); return $reader; } =begin comment my $num = $reader->max_doc; Return the highest document number available to the reader. =end comment =cut sub max_doc { shift->abstract_death } =begin comment my $num = $reader->num_docs; Return the number of (non-deleted) documents available to the reader. =end comment =cut sub num_docs { shift->abstract_death } =begin comment my $term_docs = $reader->term_docs($term); Given a Term, return a TermDocs subclass. =end comment =cut sub term_docs { shift->abstract_death } =begin comment my $norms_reader = $reader->norms_reader($field_name); Given a field name, return a NormsReader object. =end comment =cut sub norms_reader { shift->abstract_death } =begin comment $reader->delete_docs_by_term( $term ); Delete all the documents available to the reader that index the given Term. =end comment =cut sub delete_docs_by_term { shift->abstract_death } =begin comment $boolean = $reader->has_deletions Return true if any documents have been marked as deleted. =end comment =cut sub has_deletions { shift->abstract_death } =begin comment my $enum = $reader->terms($term); Given a Term, return a TermEnum subclass. The Enum will be be pre-located via $enum->seek($term) to the right spot. =end comment =cut sub terms { shift->abstract_death } =begin comment my $field_names = $reader->get_field_names( indexed => $indexed_fields_only, ); Return a hashref which is a list of field names. If the parameter 'indexed' is true, return only the names of fields which are indexed. =end comment =cut sub get_field_names { shift->abstract_death } =begin comment my $infos = $reader->generate_field_infos; Return a new FieldInfos object, describing all the fields held by the reader. The FieldInfos object will be consolidated, and thus may not be representative of every field in every segment if there are conflicting definitions. =end comment =cut sub generate_field_infos { shift->abstract_death } =begin comment my @sparse_segreaders = $reader->segreaders_to_merge; my @all_segreaders = $reader->segreaders_to_merge('all'); Find segments which are good candidates for merging, as they don't contain many valid documents. Returns an array of SegReaders. If passed an argument, return all SegReaders. =end comment =cut sub segreaders_to_merge { shift->abstract_death } =begin comment $reader->close; Release all resources. =end comment =cut sub close { shift->abstract_death } 1; __END__ =begin devdocs =head1 NAME KinoSearch1::Index::IndexReader - base class for objects which read invindexes =head1 DESCRIPTION There are two subclasses of the abstract base class IndexReader: SegReader, which reads a single segment, and MultiReader, which condenses the output of several SegReaders. Since each segment is a self-contained inverted index, a SegReader is in effect a complete index reader. The constructor for IndexReader returns either a SegReader if the index has only one segment, or a MultiReader if there are multiple segments. =head1 COPYRIGHT Copyright 2005-2010 Marvin Humphrey =head1 LICENSE, DISCLAIMER, BUGS, etc. See L version 1.00. =end devdocs =cut