You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
5.2 KiB

<?php
/*
Copyright (c) 2012, Da Xue
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY DA XUE ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL DA XUE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* https://github.com/dsx724/php-bloom-filter */
// Modified for PHP 5.2 compatibility and to support serialization.
class wfMD5BloomFilter {
private static function merge($bf1,$bf2,$bfout,$union = false){
if ($bf1->m != $bf2->m) throw new Exception('Unable to merge due to vector difference.');
if ($bf1->k != $bf2->k) throw new Exception('Unable to merge due to hash count difference.');
$length = strlen($bfout->bit_array);
if ($union){
$bfout->bit_array = $bf1->bit_array | $bf2->bit_array;
$bfout->n = $bf1->n + $bf2->n;
} else {
$bfout->bit_array = $bf1->bit_array & $bf2->bit_array;
$bfout->n = abs($bf1->n - $bf2->n);
}
}
public static function createFromProbability($n, $p){
if ($p <= 0 || $p >= 1) throw new Exception('Invalid false positive rate requested.');
if ($n <= 0) throw new Exception('Invalid capacity requested.');
$k = floor(log(1/$p,2));
$m = pow(2,ceil(log(-$n*log($p)/pow(log(2),2),2))); //approximate estimator method
return new self($m,$k);
}
public static function getUnion($bf1,$bf2){
$bf = new self($bf1->m,$bf1->k,$bf1->hash);
self::merge($bf1,$bf2,$bf,true);
return $bf;
}
public static function getIntersection($bf1,$bf2){
$bf = new self($bf1->m,$bf1->k,$bf1->hash);
self::merge($bf1,$bf2,$bf,false);
return $bf;
}
private $n = 0; // # of entries
private $m; // # of bits in array
private $k; // # of hash functions
private $k2;
private $mask;
private $bit_array; // data structure
public function __construct($m, $k){
if ($m < 8) throw new Exception('The bit array length must be at least 8 bits.');
if (($m & ($m - 1)) !== 0) throw new Exception('The bit array length must be power of 2.');
if ($m > 65536) throw new Exception('The maximum data structure size is 8KB.');
if ($k > 8) throw new Exception('The maximum bits to set is 8.');
$this->m = $m;
$this->k = $k;
$this->k2 = $k * 2;
$address_bits = (int)log($m,2);
$this->mask = (1 << $address_bits) - 8;
$this->bit_array = (binary)(str_repeat("\0",$this->getArraySize(true)));
}
public function __sleep() {
return array('n', 'm', 'k', 'k2', 'mask', 'bit_array');
}
public function calculateProbability($n = 0){
return pow(1-pow(1-1/$this->m,$this->k*($n ? $n : $this->n)),$this->k);
}
public function calculateCapacity($p){
return floor($this->m*log(2)/log($p,1-pow(1-1/$this->m,$this->m*log(2))));
}
public function getElementCount(){
return $this->n;
}
public function getArraySize($bytes = false){
return $this->m >> ($bytes ? 3 : 0);
}
public function getHashCount(){
return $this->k;
}
public function getInfo($p = null){
$units = array('','K','M','G','T','P','E','Z','Y');
$M = $this->getArraySize(true);
$magnitude = intval(floor(log($M,1024)));
$unit = $units[$magnitude];
$M /= pow(1024,$magnitude);
return 'Allocated '.$this->getArraySize().' bits ('.$M.' '.$unit.'Bytes)'.PHP_EOL.
'Using '.$this->getHashCount(). ' (16b) hashes'.PHP_EOL.
'Contains '.$this->getElementCount().' elements'.PHP_EOL.
(isset($p) ? 'Capacity of '.number_format($this->calculateCapacity($p)).' (p='.$p.')'.PHP_EOL : '');
}
public function add($key){
$hash = md5($key,true);
for ($index = 0; $index < $this->k2; $index++){
$hash_sub = (ord($hash[$index++]) << 8) | ord($hash[$index]);
$word = ($hash_sub & $this->mask) >> 3;
$this->bit_array[$word] = $this->bit_array[$word] | chr(1 << ($hash_sub & 7));
}
$this->n++;
}
public function contains($key){
$hash = md5($key,true);
for ($index = 0; $index < $this->k2; $index++){
$hash_sub = (ord($hash[$index++]) << 8) | ord($hash[$index]);
if ((ord($this->bit_array[($hash_sub & $this->mask) >> 3]) & (1 << ($hash_sub & 7))) === 0) return false;
}
return true;
}
public function unionWith($bf){
self::merge($this,$bf,$this,true);
}
public function intersectWith($bf){
self::merge($this,$bf,$this,false);
}
}