#!/usr/bin/perl
use strict;
use warnings;

use KinoSearch::Index::IndexReader;

# parse and validate command line args
die "Usage: dump_index SCHEMA_CLASS INDEX_LOCATION" unless @ARGV == 2;
my ( $schema_class, $where ) = @ARGV;
die "Invalid schema class name: $schema_class"
    unless $schema_class =~ /^\w+(::\w+)*$/;
eval "use $schema_class";
die $@ if $@;
die "'$schema_class' isn't a KinoSearch::Schema"
    unless $schema_class->isa("KinoSearch::Schema");
die "Can't open '$where'" unless -d $where;

my $schema = $schema_class->new;
my $r      = KinoSearch::Index::IndexReader->open(
    invindex => $schema_class->open($where) );

my @readers = ref $r->{sub_readers} eq 'ARRAY' ? @{ $r->{sub_readers} } : $r;
print "We have " . @readers . " readers\n";

print "\n\nDocuments:\n";
for my $reader (@readers) {
    my $seg_info = $reader->get_seg_info;
    print "Segment "
        . $reader->get_seg_name . " has "
        . $reader->max_doc
        . " docs\n";
    print "Fields:\n";
    for my $field_name ( sort $schema->all_fields ) {
        my $field_spec = $schema->fetch_fspec($field_name);
        my $field_num  = $seg_info->field_num($field_name);
        print "\t$field_num: $field_name";
        my @info;
        foreach
            my $i (qw(indexed stored analyzed vectorized binary compressed))
        {
            push @info, $i if ( $field_spec->$i );
        }
        print " [" . join( ',', map { substr( $_, 0, 1 ) } sort @info ) . "]"
            if (@info);
        print "\n";
    }
    print "Terms:\n";
    for my $field_name ( sort $schema->all_fields ) {
        my $lexicon = $reader->look_up_field($field_name);
        next unless defined $lexicon;
        my $plist = $reader->posting_list( field => $field_name );
        while ( $lexicon->next ) {
            my $term = $lexicon->get_term;
            print $term->to_string . "\n";
            $plist->seek($term);
            while ( $plist->next ) {
                print "\t Doc "
                    . $plist->get_doc_num . " ("
                    . $plist->get_doc_freq
                    . " occurrences)\n";
            }
        }
    }
}
print "Total documents: " . $r->max_doc . " in " . @readers . " segments\n";

__END__

=head1 NAME

dump_index - dump the contents of an index

=head1 SYNOPSIS

	dump_index SCHEMA_CLASSNAME INDEX_LOCATION

=head1 DESCRIPTION

This will dump out an index in human readable form.

=head1 AUTHOR

Adapted from a Plucene-based version by Brian Phillips.

=head1 COPYRIGHT AND LICENCE

Copyright 2006 Brian Phillips.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut