#!/usr/bin/perl
use strict;
use warnings;
use KinoSearch::Index::IndexReader;
# parse and validate command line args
die "Usage: dump_index SCHEMA_CLASS INDEX_LOCATION" unless @ARGV == 2;
my ( $schema_class, $where ) = @ARGV;
die "Invalid schema class name: $schema_class"
unless $schema_class =~ /^\w+(::\w+)*$/;
eval "use $schema_class";
die $@ if $@;
die "'$schema_class' isn't a KinoSearch::Schema"
unless $schema_class->isa("KinoSearch::Schema");
die "Can't open '$where'" unless -d $where;
my $schema = $schema_class->new;
my $r = KinoSearch::Index::IndexReader->open(
invindex => $schema_class->open($where) );
my @readers = ref $r->{sub_readers} eq 'ARRAY' ? @{ $r->{sub_readers} } : $r;
print "We have " . @readers . " readers\n";
print "\n\nDocuments:\n";
for my $reader (@readers) {
my $seg_info = $reader->get_seg_info;
print "Segment "
. $reader->get_seg_name . " has "
. $reader->max_doc
. " docs\n";
print "Fields:\n";
for my $field_name ( sort $schema->all_fields ) {
my $field_spec = $schema->fetch_fspec($field_name);
my $field_num = $seg_info->field_num($field_name);
print "\t$field_num: $field_name";
my @info;
foreach
my $i (qw(indexed stored analyzed vectorized binary compressed))
{
push @info, $i if ( $field_spec->$i );
}
print " [" . join( ',', map { substr( $_, 0, 1 ) } sort @info ) . "]"
if (@info);
print "\n";
}
print "Terms:\n";
for my $field_name ( sort $schema->all_fields ) {
my $lexicon = $reader->look_up_field($field_name);
next unless defined $lexicon;
my $plist = $reader->posting_list( field => $field_name );
while ( $lexicon->next ) {
my $term = $lexicon->get_term;
print $term->to_string . "\n";
$plist->seek($term);
while ( $plist->next ) {
print "\t Doc "
. $plist->get_doc_num . " ("
. $plist->get_doc_freq
. " occurrences)\n";
}
}
}
}
print "Total documents: " . $r->max_doc . " in " . @readers . " segments\n";
__END__
=head1 NAME
dump_index - dump the contents of an index
=head1 SYNOPSIS
dump_index SCHEMA_CLASSNAME INDEX_LOCATION
=head1 DESCRIPTION
This will dump out an index in human readable form.
=head1 AUTHOR
Adapted from a Plucene-based version by Brian Phillips.
=head1 COPYRIGHT AND LICENCE
Copyright 2006 Brian Phillips.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut