You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
133 lines
5.2 KiB
133 lines
5.2 KiB
5 years ago
|
<?php
|
||
|
/*
|
||
|
Copyright (c) 2012, Da Xue
|
||
|
All rights reserved.
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions are met:
|
||
|
1. Redistributions of source code must retain the above copyright
|
||
|
notice, this list of conditions and the following disclaimer.
|
||
|
2. Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in the
|
||
|
documentation and/or other materials provided with the distribution.
|
||
|
3. The name of the author nor the names of its contributors may be used
|
||
|
to endorse or promote products derived from this software without
|
||
|
specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY DA XUE ''AS IS'' AND ANY
|
||
|
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
DISCLAIMED. IN NO EVENT SHALL DA XUE BE LIABLE FOR ANY
|
||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
/* https://github.com/dsx724/php-bloom-filter */
|
||
|
|
||
|
// Modified for PHP 5.2 compatibility and to support serialization.
|
||
|
|
||
|
class wfMD5BloomFilter {
|
||
|
private static function merge($bf1,$bf2,$bfout,$union = false){
|
||
|
if ($bf1->m != $bf2->m) throw new Exception('Unable to merge due to vector difference.');
|
||
|
if ($bf1->k != $bf2->k) throw new Exception('Unable to merge due to hash count difference.');
|
||
|
$length = strlen($bfout->bit_array);
|
||
|
if ($union){
|
||
|
$bfout->bit_array = $bf1->bit_array | $bf2->bit_array;
|
||
|
$bfout->n = $bf1->n + $bf2->n;
|
||
|
} else {
|
||
|
$bfout->bit_array = $bf1->bit_array & $bf2->bit_array;
|
||
|
$bfout->n = abs($bf1->n - $bf2->n);
|
||
|
}
|
||
|
}
|
||
|
public static function createFromProbability($n, $p){
|
||
|
if ($p <= 0 || $p >= 1) throw new Exception('Invalid false positive rate requested.');
|
||
|
if ($n <= 0) throw new Exception('Invalid capacity requested.');
|
||
|
$k = floor(log(1/$p,2));
|
||
|
$m = pow(2,ceil(log(-$n*log($p)/pow(log(2),2),2))); //approximate estimator method
|
||
|
return new self($m,$k);
|
||
|
}
|
||
|
public static function getUnion($bf1,$bf2){
|
||
|
$bf = new self($bf1->m,$bf1->k,$bf1->hash);
|
||
|
self::merge($bf1,$bf2,$bf,true);
|
||
|
return $bf;
|
||
|
}
|
||
|
public static function getIntersection($bf1,$bf2){
|
||
|
$bf = new self($bf1->m,$bf1->k,$bf1->hash);
|
||
|
self::merge($bf1,$bf2,$bf,false);
|
||
|
return $bf;
|
||
|
}
|
||
|
private $n = 0; // # of entries
|
||
|
private $m; // # of bits in array
|
||
|
private $k; // # of hash functions
|
||
|
private $k2;
|
||
|
private $mask;
|
||
|
private $bit_array; // data structure
|
||
|
public function __construct($m, $k){
|
||
|
if ($m < 8) throw new Exception('The bit array length must be at least 8 bits.');
|
||
|
if (($m & ($m - 1)) !== 0) throw new Exception('The bit array length must be power of 2.');
|
||
|
if ($m > 65536) throw new Exception('The maximum data structure size is 8KB.');
|
||
|
if ($k > 8) throw new Exception('The maximum bits to set is 8.');
|
||
|
$this->m = $m;
|
||
|
$this->k = $k;
|
||
|
$this->k2 = $k * 2;
|
||
|
$address_bits = (int)log($m,2);
|
||
|
$this->mask = (1 << $address_bits) - 8;
|
||
|
$this->bit_array = (binary)(str_repeat("\0",$this->getArraySize(true)));
|
||
|
}
|
||
|
public function __sleep() {
|
||
|
return array('n', 'm', 'k', 'k2', 'mask', 'bit_array');
|
||
|
}
|
||
|
public function calculateProbability($n = 0){
|
||
|
return pow(1-pow(1-1/$this->m,$this->k*($n ? $n : $this->n)),$this->k);
|
||
|
}
|
||
|
public function calculateCapacity($p){
|
||
|
return floor($this->m*log(2)/log($p,1-pow(1-1/$this->m,$this->m*log(2))));
|
||
|
}
|
||
|
public function getElementCount(){
|
||
|
return $this->n;
|
||
|
}
|
||
|
public function getArraySize($bytes = false){
|
||
|
return $this->m >> ($bytes ? 3 : 0);
|
||
|
}
|
||
|
public function getHashCount(){
|
||
|
return $this->k;
|
||
|
}
|
||
|
public function getInfo($p = null){
|
||
|
$units = array('','K','M','G','T','P','E','Z','Y');
|
||
|
$M = $this->getArraySize(true);
|
||
|
$magnitude = intval(floor(log($M,1024)));
|
||
|
$unit = $units[$magnitude];
|
||
|
$M /= pow(1024,$magnitude);
|
||
|
return 'Allocated '.$this->getArraySize().' bits ('.$M.' '.$unit.'Bytes)'.PHP_EOL.
|
||
|
'Using '.$this->getHashCount(). ' (16b) hashes'.PHP_EOL.
|
||
|
'Contains '.$this->getElementCount().' elements'.PHP_EOL.
|
||
|
(isset($p) ? 'Capacity of '.number_format($this->calculateCapacity($p)).' (p='.$p.')'.PHP_EOL : '');
|
||
|
}
|
||
|
public function add($key){
|
||
|
$hash = md5($key,true);
|
||
|
for ($index = 0; $index < $this->k2; $index++){
|
||
|
$hash_sub = (ord($hash[$index++]) << 8) | ord($hash[$index]);
|
||
|
$word = ($hash_sub & $this->mask) >> 3;
|
||
|
$this->bit_array[$word] = $this->bit_array[$word] | chr(1 << ($hash_sub & 7));
|
||
|
}
|
||
|
$this->n++;
|
||
|
}
|
||
|
public function contains($key){
|
||
|
$hash = md5($key,true);
|
||
|
for ($index = 0; $index < $this->k2; $index++){
|
||
|
$hash_sub = (ord($hash[$index++]) << 8) | ord($hash[$index]);
|
||
|
if ((ord($this->bit_array[($hash_sub & $this->mask) >> 3]) & (1 << ($hash_sub & 7))) === 0) return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
public function unionWith($bf){
|
||
|
self::merge($this,$bf,$this,true);
|
||
|
}
|
||
|
public function intersectWith($bf){
|
||
|
self::merge($this,$bf,$this,false);
|
||
|
}
|
||
|
}
|