package Set::ConsistentHash;
use strict;
use Digest::SHA1 qw(sha1);
use Carp qw(croak);
use vars qw($VERSION);
$VERSION = '0.92';

=head1 NAME

Set::ConsistentHash - library for doing consistent hashing

=head1 SYNOPSIS

  my $set = Set::ConsistentHash->new;

=head1 OVERVIEW

Description, shamelessly stolen from Wikipedia:

  Consistent hashing is a scheme that provides hash table
  functionality in a way that the addition or removal of one slot does
  not significantly change the mapping of keys to slots. In contrast,
  in most traditional hash tables, a change in the number of array
  slots causes nearly all keys to be remapped.

  Consistent hashing was introduced in 1997 as a way of distributing
  requests among a changing population of web servers. More recently,
  it and similar techniques have been employed in distributed hash
  tables.

You're encouraged to read the original paper, linked below.

=head1 TERMINOLOGY

Terminology about this stuff seems to vary.  For clarity, this module
uses the following:

B<Consistent Hash> -- The object you work with.  Contains 0 or more
"targets", each with a weight.

B<Target> -- A member of the set.  The weight (an arbitrary number),
specifies how often it occurs relative to other targets.

=head1 CLASS METHODS

=head2 new

  $set = Set::ConsistentHash->new;

Takes no options.  Creates a new consistent hashing set with no
targets.  You'll need to add them.

=cut

# creates a new consistent hashing set with no targets.  you'll need to add targets.
sub new {
    my $class = shift;
    croak("Unknown parameters") if @_;
    my $self = bless {
        weights => {},  # $target => integer $weight
        points  => {},  # 32-bit value points on 'circle' => \$target
        order   => [],  # 32-bit points, sorted
        buckets      => undef, # when requested, arrayref of 1024 buckets mapping to targets
        total_weight => undef, # when requested, total weight of all targets
        hash_func    => undef, # hash function for key lookup
    }, $class;
    return $self;
}

############################################################################

=head1 INSTANCE METHODS

=cut

############################################################################

=head2 targets

Returns (alphabetically sorted) array of all targets in set.

=cut

sub targets {
    my $self = shift;
    return sort keys %{$self->{weights}};
}

############################################################################

=head2 reset_targets

Remove all targets.

=cut

sub reset_targets {
    my $self = shift;
    $self->modify_targets(map { $_ => 0 } $self->targets);
}
*clear = \&reset_targets;

############################################################################

=head2 set_targets

    $set->set_targets(%target_to_weight);
    $set->set_targets("foo" => 5, "bar" => 10);

Removes all targets, then sets the provided ones with the weightings provided.

=cut

sub set_targets {
    my $self = shift;
    $self->reset_targets;
    $self->modify_targets(@_);
}

############################################################################

=head2 modify_targets

    $set->modify_targets(%target_to_weight);

Without removing existing targets, modifies the weighting of provided
targets.  A weight of undef or 0 removes an item from the set.

=cut

# add/modify targets.  parameters are %weights:  $target -> $weight
sub modify_targets {
    my ($self, %weights) = @_;

    # uncache stuff:
    $self->{total_weight} = undef;
    $self->{buckets}      = undef;

    while (my ($target, $weight) = each %weights) {
        if ($weight) {
            $self->{weights}{$target} = $weight;
        } else {
            delete $self->{weights}{$target};
        }
    }
    $self->_redo_circle;
}

############################################################################

=head2 set_target

    $set->set_target($target => $weight);

A wrapper around modify_targets that sounds better for modifying a single item.

=cut

*set_target = \&modify_targets;

############################################################################

=head2 total_weight

Returns sum of all current targets' weights.

=cut

#'
sub total_weight {
    my $self = shift;
    return $self->{total_weight} if defined $self->{total_weight};
    my $sum = 0;
    foreach my $val (values %{$self->{weights}}) {
        $sum += $val;
    }
    return $self->{total_weight} = $sum;
}

############################################################################

=head2 percent_weight

   $weight = $set->percent_weight($target);
   $weight = $set->percent_weight("10.0.0.2");

Returns number in range [0,100] representing percentage of weight that provided $target has.

=cut

sub percent_weight {
    my ($self, $target) = @_;
    return 0 unless $self->{weights}{$target};
    return 100 * $self->{weights}{$target} / $self->total_weight;
}

############################################################################

=head2 set_hash_func

    $set->set_hash_func(\&your_hash_func);

Sets the function with which keys will be hashed before looking up
which target they will be mapped onto.

=cut

sub set_hash_func {
    my ($self, $hash_func) = @_;
    $self->{hash_func} = $hash_func;
}

############################################################################

=head2 get_target

    $selected_target = $set->get_target(your_hash_func($your_key));

    - or -

    $set->set_hash_func(\&your_hash_func);
    $selected_target = $set->get_target($your_key);

Given a key, select the target in the set to which that key is mapped.

If you find the target (say, a server) to be dead or otherwise
unavailable, remove it from the set, and get the target again.

=cut

sub get_target {
    my ($self, $key) = @_;
    _compute_buckets($self) unless $self->{buckets};
    $key = $self->{hash_func}->($key) if $self->{hash_func};
    return $self->{buckets}->[$key % 1024];
}

=head2 buckets

    $selected_target = $set->buckets->[your_hash_func($your_key) % 1024];

Returns an arrayref of 1024 selected items from the set, in a consistent order.

This is what you want to use to actually select items quickly in your
application.

If you find the target (say, a server) to be dead, or otherwise
unavailable, remove it from the set, and look at that index in the
bucket arrayref again.

=cut

# returns arrayref of 1024 buckets.  each array element is the $target for that bucket index.
sub buckets {
    my $self = shift;
    _compute_buckets($self) unless $self->{buckets};
    return $self->{buckets};
}

############################################################################

=head1 INTERNALS

=head2 _compute_buckets

Computes and returns an array of 1024 selected items from the set,
in a consistent order.

=cut

# Computes and returns array of 1024 buckets.  Each array element is the
# $target for that bucket index.
sub _compute_buckets {
    my $self = shift;
    my @buckets = ();
    my $by = 2**22;  # 2**32 / 2**10 (1024)
    my $pt = 0;
    for my $n (0..1023) {
        $buckets[$n] = $self->target_of_point($pt);
        $pt += $by;
    }
    return $self->{buckets} = \@buckets;
}

=head2 target_of_point

   $target = $set->target_of_point($point)

Given a $point, an integer in the range [0,2**32), returns (somewhat
slowly), the next target found, clockwise from that point on the circle.

This is mostly an internal method, used to generated the 1024-element
cached bucket arrayref when needed.  You probably don't want to use this.
Instead, use the B<buckets> method, and run your hash function on your key,
generating an integer, modulous 1024, and looking up that bucket index's target.

=cut

# given a $point [0,2**32), returns the $target that's next going around the circle
sub target_of_point {
    my ($self, $pt) = @_;  # $pt is 32-bit unsigned integer

    my $order = $self->{order};
    my $circle_pt = $self->{points};

    my ($lo, $hi) = (0, scalar(@$order)-1);  # inclusive candidates

    while (1) {
        my $mid           = int(($lo + $hi) / 2);
        my $val_at_mid    = $order->[$mid];
        my $val_one_below = $mid ? $order->[$mid-1] : 0;

        # match
        return ${ $circle_pt->{$order->[$mid]} } if
            $pt <= $val_at_mid && $pt > $val_one_below;

        # wrap-around match
        return ${ $circle_pt->{$order->[0]} } if
            $lo == $hi;

        # too low, go up.
        if ($val_at_mid < $pt) {
            $lo = $mid + 1;
            $lo = $hi if $lo > $hi;
        }
        # too high
        else {
            $hi = $mid - 1;
            $hi = $lo if $hi < $lo;
        }

        next;
    }
};

############################################################################
#  Internal...
############################################################################

sub _redo_circle {
    my $self = shift;

    my $pts = $self->{points} = {};
    while (my ($target, $weight) = each %{$self->{weights}}) {
        my $num_pts = $weight * 100;
        foreach my $ptn (1..$num_pts) {
            my $key = "$target-$ptn";
            my $val = unpack("L", substr(sha1($key), 0, 4));
            $pts->{$val} = \$target;
        }
    }

    $self->{order} = [ sort { $a <=> $b } keys %$pts ];
}


=head1 REFERENCES

L<http://en.wikipedia.org/wiki/Consistent_hashing>

L<http://www8.org/w8-papers/2a-webserver/caching/paper2.html>

=head1 AUTHOR

Brad Fitzpatrick -- brad@danga.com

=head1 CONTRIBUTING

Bug, performance, doc, feature patch?  See
L<http://contributing.appspot.com/set-consistenthash-perl>

=head1 COPYRIGHT & LICENSE

Copyright 2007, Six Apart, Ltd.

You're granted permission to use this code under the same terms as Perl itself.

=head1 WARRANTY

This is free software.  It comes with no warranty of any kind.

=cut

1;