package KinoSearch::Util::BitVector;
use strict;
use warnings;
use KinoSearch::Util::ToolSet;
use base qw( KinoSearch::Util::Class );
our %instance_vars = __PACKAGE__->init_instance_vars( capacity => 0, );
sub new {
my $class = shift;
verify_args( \%instance_vars, @_ );
my %args = ( %instance_vars, @_ );
$class = ref($class) || $class;
return _new( $class, $args{capacity} );
}
1;
__END__
__XS__
MODULE = KinoSearch PACKAGE = KinoSearch::Util::BitVector
void
_new(class, capacity)
char *class;
U32 capacity;
PREINIT:
BitVector *obj;
PPCODE:
obj = Kino_BitVec_new(capacity);
ST(0) = sv_newmortal();
sv_setref_pv(ST(0), class, (void*)obj);
XSRETURN(1);
=for comment
Return true if the bit indcated by $num has been set, false if it hasn't
(regardless of whether $num lies within the bounds of the object's capacity).
=cut
bool
get(obj, num)
BitVector *obj;
U32 num;
CODE:
RETVAL = Kino_BitVec_get(obj, num);
OUTPUT: RETVAL
=for comment
Set the bit at $num to 1.
=cut
void
set(obj, ...)
BitVector *obj;
PREINIT:
U32 i, num;
PPCODE:
for (i = 1; i < items; i++) {
num = (U32)( SvUV( ST(i) ) );
Kino_BitVec_set(obj, num);
}
=for comment
Clear the bit at $num (i.e. set it to 0).
=cut
void
clear(obj, num)
BitVector *obj;
U32 num;
PPCODE:
Kino_BitVec_clear(obj, num);
=for comment
Set all the bits bounded by $first and $last, inclusive, to 1.
=cut
void
bulk_set(obj, first, last)
BitVector *obj;
U32 first;
U32 last;
PPCODE:
Kino_BitVec_bulk_set(obj, first, last);
=for comment
Clear all the bits bounded by $first and $last, inclusive.
=cut
void
bulk_clear(obj, first, last)
BitVector *obj;
U32 first;
U32 last;
PPCODE:
Kino_BitVec_bulk_clear(obj, first, last);
=for comment
Given $num, return either $num (if it is set), the next set bit above it, or
if no such bit exists, undef (from Perl) or a sentinel (0xFFFFFFFF) from C.
=cut
SV*
next_set_bit(obj, num)
BitVector *obj;
U32 num;
CODE:
num = Kino_BitVec_next_set_bit(obj, num);
RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);
OUTPUT: RETVAL
=for comment
Given $num, return $num (if it is clear), or the next clear bit above it.
The highest number that next_clear_bit can return is the object's capacity.
=cut
SV*
next_clear_bit(obj, num)
BitVector *obj;
U32 num;
CODE:
num = Kino_BitVec_next_clear_bit(obj, num);
RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);
OUTPUT: RETVAL
=for comment
Modify the BitVector so that only bits which remain set are those which 1)
were already set in this BitVector, and 2) were also set in the other
BitVector.
=cut
void
logical_and(obj, other)
BitVector *obj;
BitVector *other;
PPCODE:
Kino_BitVec_logical_and(obj, other);
=for comment
Return an arrayref of the with each element the number of a set bit.
=cut
void
to_arrayref(obj)
BitVector *obj;
PREINIT:
AV *out_av;
PPCODE:
out_av = Kino_BitVec_to_array(obj);
XPUSHs(newRV_noinc( (SV*)out_av ));
XSRETURN(1);
=for comment
Setters and getters. Two quirks: set_capacity can't adjust capacity
downwards, and set_bits automatically adjusts capacity to the appropriate
multiple of 8.
=cut
SV*
_set_or_get(obj, ...)
BitVector *obj;
ALIAS:
set_capacity = 1
get_capacity = 2
set_bits = 3
get_bits = 4
PREINIT:
STRLEN len;
U32 new_capacity;
char *new_bits;
CODE:
{
/* if called as a setter, make sure the extra arg is there */
if (ix % 2 == 1 && items != 2)
croak("usage: $term_info->set_xxxxxx($val)");
switch (ix) {
case 1: new_capacity = SvUV(ST(1));
if (new_capacity < obj->capacity) {
Kino_confess("can't shrink capacity from %d to %d",
obj->capacity, obj->capacity);
}
Kino_BitVec_grow(obj, new_capacity);
/* fall through */
case 2: RETVAL = newSVuv(obj->capacity);
break;
case 3: Kino_Safefree(obj->bits);
new_bits = SvPV(ST(1), len);
obj->bits = (unsigned char*)Kino_savepvn(new_bits, len);
obj->capacity = len << 3;
/* fall through */
case 4: len = ceil(obj->capacity / 8.0);
RETVAL = newSVpv((char*)obj->bits, len);
break;
default: Kino_confess("Internal error: _set_or_get ix: %d", ix);
}
}
OUTPUT: RETVAL
void
DESTROY(obj)
BitVector *obj;
PPCODE:
Kino_BitVec_destroy(obj);
__H__
#ifndef H_KINO_BIT_VECTOR
#define H_KINO_BIT_VECTOR 1
#include "limits.h"
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearchUtilEndianUtils.h"
#include "KinoSearchUtilCarp.h"
#include "KinoSearchUtilMemManager.h"
#define KINO_BITVEC_SENTINEL 0xFFFFFFFF
typedef struct bitvector {
U32 capacity;
unsigned char *bits;
} BitVector;
BitVector* Kino_BitVec_new(U32);
BitVector* Kino_BitVec_clone(BitVector*);
void Kino_BitVec_grow(BitVector*, U32);
void Kino_BitVec_set(BitVector*, U32);
void Kino_BitVec_clear(BitVector*, U32);
void Kino_BitVec_bulk_set(BitVector*, U32, U32);
void Kino_BitVec_bulk_clear(BitVector*, U32, U32);
bool Kino_BitVec_get(BitVector*, U32);
U32 Kino_BitVec_next_set_bit(BitVector*, U32);
U32 Kino_BitVec_next_clear_bit(BitVector*, U32);
void Kino_BitVec_logical_and(BitVector*, BitVector*);
AV* Kino_BitVec_to_array(BitVector*);
void Kino_BitVec_destroy(BitVector*);
#endif /* include guard */
__C__
#include "KinoSearchUtilBitVector.h"
static unsigned char bitmasks[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
};
BitVector*
Kino_BitVec_new(U32 capacity) {
BitVector *obj;
Kino_New(0, obj, 1, BitVector);
obj->capacity = 0;
obj->bits = NULL;
Kino_BitVec_grow(obj, capacity);
return obj;
}
BitVector*
Kino_BitVec_clone(BitVector *obj) {
BitVector *evil_twin;
U32 byte_size;
Kino_New(0, evil_twin, 1, BitVector);
byte_size = ceil(obj->capacity / 8.0);
evil_twin->bits
= (unsigned char*)Kino_savepvn((char*)obj->bits, byte_size);
evil_twin->capacity = obj->capacity;
return evil_twin;
}
void
Kino_BitVec_grow(BitVector *obj, U32 capacity) {
U32 byte_size;
U32 old_capacity;
/* derive size in bytes from size in bits */
byte_size = ceil(capacity / 8.0);
if (capacity > obj->capacity && obj->bits != NULL) {
U32 old_byte_size;
old_byte_size = ceil(obj->capacity / 8.0);
Kino_Renew(obj->bits, byte_size, unsigned char);
/* zero out all new bits, since Renew doesn't guarantee they're 0 */
old_capacity = obj->capacity;
obj->capacity = capacity;
Kino_BitVec_bulk_clear(obj, old_capacity, capacity - 1);
/* shouldn't be necessary, but Valgrind reports an error without it */
if (byte_size > old_byte_size) {
memset( (obj->bits + old_byte_size), 0x00,
(byte_size - old_byte_size) );
}
}
else if (obj->bits == NULL) {
Kino_Newz(0, obj->bits, byte_size, unsigned char);
obj->capacity = capacity;
}
}
void
Kino_BitVec_set(BitVector *obj, U32 num) {
if (num >= obj->capacity)
Kino_BitVec_grow(obj, num + 1);
obj->bits[ (num >> 3) ] |= bitmasks[num & 0x7];
}
void
Kino_BitVec_clear(BitVector *obj, U32 num) {
if (num >= obj->capacity)
Kino_BitVec_grow(obj, num + 1);
obj->bits[ (num >> 3) ] &= ~(bitmasks[num & 0x7]);
}
void
Kino_BitVec_bulk_set(BitVector *obj, U32 first, U32 last) {
unsigned char *ptr;
U32 num_bytes;
/* detect range errors */
if (first > last) {
Kino_confess("bitvec range error: %d %d %d", first, last,
obj->capacity);
}
/* grow the bits if necessary */
if (last >= obj->capacity) {
Kino_BitVec_grow(obj, last);
}
/* set partial bytes */
while (first % 8 != 0 && first <= last) {
Kino_BitVec_set(obj, first++);
}
while (last % 8 != 0 && last >= first) {
Kino_BitVec_set(obj, last--);
}
Kino_BitVec_set(obj, last);
/* mass set whole bytes */
if (last > first) {
ptr = obj->bits + (first >> 3);
num_bytes = (last - first) >> 3;
memset(ptr, 0xff, num_bytes);
}
}
void
Kino_BitVec_bulk_clear(BitVector *obj, U32 first, U32 last) {
unsigned char *ptr;
U32 num_bytes;
/* detect range errors */
if (first > last) {
Kino_confess("bitvec range error: %d %d %d", first, last,
obj->capacity);
}
/* grow the bits if necessary */
if (last >= obj->capacity) {
Kino_BitVec_grow(obj, last);
}
/* clear partial bytes */
while (first % 8 != 0 && first <= last) {
Kino_BitVec_clear(obj, first++);
}
while (last % 8 != 0 && last >= first) {
Kino_BitVec_clear(obj, last--);
}
Kino_BitVec_clear(obj, last);
/* mass clear whole bytes */
if (last > first) {
ptr = obj->bits + (first >> 3);
num_bytes = (last - first) >> 3;
memset(ptr, 0, num_bytes);
}
}
bool
Kino_BitVec_get(BitVector *obj, U32 num) {
if (num >= obj->capacity) return 0;
return (obj->bits[ (num >> 3) ] & bitmasks[num & 0x7]) != 0;
}
U32
Kino_BitVec_next_set_bit(BitVector *obj, U32 num) {
U32 outval;
unsigned char *bits_ptr;
unsigned char *end_ptr;
int i;
U32 byte_size;
if (num >= obj->capacity) {
return KINO_BITVEC_SENTINEL;
}
outval = KINO_BITVEC_SENTINEL;
bits_ptr = obj->bits + (num >> 3) ;
byte_size = ceil(obj->capacity / 8.0);
end_ptr = obj->bits + byte_size;
while (outval == KINO_BITVEC_SENTINEL) {
if (*bits_ptr != 0) {
/* check each num in represented in this byte */
outval = (bits_ptr - obj->bits) * 8;
for (i = 0; i < 8; i++) {
if (Kino_BitVec_get(obj, outval) == 1) {
if (outval < obj->capacity && outval >= num) {
return outval;
}
}
outval++;
}
/* nothing valid, so reset the sentinel */
outval = KINO_BITVEC_SENTINEL;
}
if (++bits_ptr >= end_ptr)
break;
}
/* nothing valid, so return a sentinel */
return KINO_BITVEC_SENTINEL;
}
U32
Kino_BitVec_next_clear_bit(BitVector *obj, U32 num) {
U32 outval;
unsigned char *bits_ptr;
unsigned char *end_ptr;
int i;
if (num >= obj->capacity) {
return num;
}
outval = KINO_BITVEC_SENTINEL;
bits_ptr = obj->bits + (num >> 3) ;
end_ptr = obj->bits + (obj->capacity >> 3);
while (outval == KINO_BITVEC_SENTINEL) {
if (*bits_ptr != 0xFF) {
/* check each num in represented in this byte */
outval = (bits_ptr - obj->bits) * 8;
for (i = 0; i < 8; i++) {
if (Kino_BitVec_get(obj, outval) == 0) {
if (outval < obj->capacity && outval >= num) {
return outval;
}
}
outval++;
}
/* nothing valid, so reset the sentinel */
outval = KINO_BITVEC_SENTINEL;
}
if (++bits_ptr >= end_ptr)
break;
}
/* didn't find clear bits in the set, so return 1 larger than the max */
return obj->capacity;
}
void
Kino_BitVec_logical_and(BitVector *obj, BitVector *other) {
U32 num = 0;
while (1) {
num = Kino_BitVec_next_set_bit(obj, num);
if (num == KINO_BITVEC_SENTINEL)
break;
if ( !Kino_BitVec_get(other, num) )
Kino_BitVec_clear(obj, num);
num++;
}
}
AV*
Kino_BitVec_to_array(BitVector* obj) {
U32 num = 0;
AV *out_av;
out_av = newAV();
while (1) {
num = Kino_BitVec_next_set_bit(obj, num);
if (num == KINO_BITVEC_SENTINEL)
break;
av_push( out_av, newSViv(num) );
num++;
}
return out_av;
}
void
Kino_BitVec_destroy(BitVector* obj) {
Kino_Safefree(obj->bits);
Kino_Safefree(obj);
}
__POD__
=begin devdocs
=head1 NAME
KinoSearch::Util::BitVector - a set of bits
=head1 DESCRIPTION
A vector of bits, which grows as needed. The implementation is designed to
resemble both org.apache.lucene.util.BitVector and java.util.BitSet.
Accessible from both C and Perl.
=head1 COPYRIGHT
Copyright 2005-2006 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
See L<KinoSearch|KinoSearch> version 0.08.
=end devdocs
=cut