# ***********************************************
# 
# !!!! DO NOT EDIT !!!!
# 
# This file was auto-generated by Build.PL.
# 
# ***********************************************
# 
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

=encoding utf8

=head1 NAME

Lucy::Analysis::Token - Unit of text.

=head1 SYNOPSIS

        my $token = Lucy::Analysis::Token->new(
            text         => 'blind',
            start_offset => 8,
            end_offset   => 13,
        );

        $token->set_text('mice');

=head1 DESCRIPTION

Token is the fundamental unit used by Apache Lucy’s Analyzer subclasses.
Each Token has 5 attributes: C<text>, C<start_offset>,
C<end_offset>, C<boost>, and C<pos_inc>.

The C<text> attribute is a Unicode string encoded as UTF-8.

C<start_offset> is the start point of the token text, measured in
Unicode code points from the top of the stored field;
C<end_offset> delimits the corresponding closing boundary.
C<start_offset> and C<end_offset> locate the Token
within a larger context, even if the Token’s text attribute gets modified
– by stemming, for instance.  The Token for “beating” in the text “beating
a dead horse” begins life with a start_offset of 0 and an end_offset of 7;
after stemming, the text is “beat”, but the start_offset is still 0 and the
end_offset is still 7.  This allows “beating” to be highlighted correctly
after a search matches “beat”.

C<boost> is a per-token weight.  Use this when you want to assign
more or less importance to a particular token, as you might for emboldened
text within an HTML document, for example.  (Note: The field this token
belongs to must be spec’d to use a posting of type RichPosting.)

C<pos_inc> is the POSition INCrement, measured in Tokens.  This
attribute, which defaults to 1, is a an advanced tool for manipulating
phrase matching.  Ordinarily, Tokens are assigned consecutive position
numbers: 0, 1, and 2 for C<"three blind mice">.  However, if you
set the position increment for “blind” to, say, 1000, then the three tokens
will end up assigned to positions 0, 1, and 1001 – and will no longer
produce a phrase match for the query C<"three blind mice">.

=head1 CONSTRUCTORS

=head2 new

    my $token = Lucy::Analysis::Token->new(
        text         => $text,          # required
        start_offset => $start_offset,  # required
        end_offset   => $end_offset,    # required
        boost        => 1.0,            # optional
        pos_inc      => 1,              # optional
    );

=over

=item *

B<text> - A string.

=item *

B<start_offset> - Start offset into the original document in Unicode
code points.

=item *

B<start_offset> - End offset into the original document in Unicode
code points.

=item *

B<boost> - Per-token weight.

=item *

B<pos_inc> - Position increment for phrase matching.

=back

=head1 METHODS

=head2 get_text

    my $text = $token->get_text;

Get the token's text.

=head2 set_text

    $token->set_text($text);

Set the token's text.

=head2 get_start_offset

    my $int = $token->get_start_offset();

=head2 get_end_offset

    my $int = $token->get_end_offset();

=head2 get_boost

    my $float = $token->get_boost();

=head2 get_pos_inc

    my $int = $token->get_pos_inc();

=head2 get_len

    my $int = $token->get_len();

=head1 INHERITANCE

Lucy::Analysis::Token isa Clownfish::Obj.

=cut