19 #ifndef INCLUDE_BLOOM_FILTER_HPP 20 #define INCLUDE_BLOOM_FILTER_HPP 51 maximum_size(std::numeric_limits<unsigned long long int>::max()),
121 double min_m = std::numeric_limits<double>::infinity();
128 const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
130 const double curr_m = numerator / denominator;
145 optp.
table_size =
static_cast<unsigned long long int>(min_m);
177 projected_element_count_(0),
178 inserted_element_count_ (0),
180 desired_false_positive_probability_(0.0)
185 inserted_element_count_(0),
192 generate_unique_salt();
194 bit_table_.resize(table_size_ /
bits_per_char, static_cast<unsigned char>(0x00));
199 this->operator=(filter);
209 (bit_table_.size() == f.
bit_table_.size() ) &&
214 (salt_ == f.
salt_ ) &&
223 return !operator==(f);
251 return (0 == table_size_);
256 std::fill(bit_table_.begin(), bit_table_.end(),
static_cast<unsigned char>(0x00));
257 inserted_element_count_ = 0;
260 inline void insert(
const unsigned char* key_begin,
const std::size_t& length)
262 std::size_t bit_index = 0;
265 for (std::size_t i = 0; i < salt_.size(); ++i)
267 compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
272 ++inserted_element_count_;
275 template <
typename T>
279 insert(reinterpret_cast<const unsigned char*>(&t),
sizeof(T));
282 inline void insert(
const std::string& key)
284 insert(reinterpret_cast<const unsigned char*>(key.data()),key.size());
287 inline void insert(
const char* data,
const std::size_t& length)
289 insert(reinterpret_cast<const unsigned char*>(data),length);
292 template <
typename InputIterator>
293 inline void insert(
const InputIterator begin,
const InputIterator end)
295 InputIterator itr = begin;
303 inline virtual bool contains(
const unsigned char* key_begin,
const std::size_t length)
const 305 std::size_t bit_index = 0;
308 for (std::size_t i = 0; i < salt_.size(); ++i)
310 compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
321 template <
typename T>
324 return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(
sizeof(T)));
329 return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
332 inline bool contains(
const char* data,
const std::size_t& length)
const 334 return contains(reinterpret_cast<const unsigned char*>(data),length);
337 template <
typename InputIterator>
338 inline InputIterator
contains_all(
const InputIterator begin,
const InputIterator end)
const 340 InputIterator itr = begin;
355 template <
typename InputIterator>
356 inline InputIterator
contains_none(
const InputIterator begin,
const InputIterator end)
const 358 InputIterator itr = begin;
373 inline virtual unsigned long long int size()
const 380 return inserted_element_count_;
392 return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size());
404 for (std::size_t i = 0; i < bit_table_.size(); ++i)
422 for (std::size_t i = 0; i < bit_table_.size(); ++i)
440 for (std::size_t i = 0; i < bit_table_.size(); ++i)
449 inline const cell_type*
table()
const 451 return bit_table_.data();
461 inline virtual void compute_indices(
const bloom_type& hash, std::size_t& bit_index, std::size_t& bit)
const 463 bit_index = hash % table_size_;
475 const unsigned int predef_salt_count = 128;
477 static const bloom_type predef_salt[predef_salt_count] =
479 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
480 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
481 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
482 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
483 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
484 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
485 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
486 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
487 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
488 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
489 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
490 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
491 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
492 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
493 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
494 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
495 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
496 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
497 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
498 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
499 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
500 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
501 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
502 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
503 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
504 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
505 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
506 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
507 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
508 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
509 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
510 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
513 if (salt_count_ <= predef_salt_count)
515 std::copy(predef_salt,
516 predef_salt + salt_count_,
517 std::back_inserter(salt_));
519 for (std::size_t i = 0; i < salt_.size(); ++i)
527 salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] +
static_cast<bloom_type
>(random_seed_);
532 std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));
534 srand(static_cast<unsigned int>(random_seed_));
536 while (salt_.size() < salt_count_)
538 bloom_type current_salt =
static_cast<bloom_type
>(rand()) * static_cast<bloom_type>(rand());
540 if (0 == current_salt)
543 if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
545 salt_.push_back(current_salt);
551 inline bloom_type
hash_ap(
const unsigned char* begin, std::size_t remaining_length, bloom_type hash)
const 553 const unsigned char* itr = begin;
554 unsigned int loop = 0;
556 while (remaining_length >= 8)
558 const unsigned int& i1 = *(
reinterpret_cast<const unsigned int*
>(itr)); itr +=
sizeof(
unsigned int);
559 const unsigned int& i2 = *(
reinterpret_cast<const unsigned int*
>(itr)); itr +=
sizeof(
unsigned int);
561 hash ^= (hash << 7) ^ i1 * (hash >> 3) ^
562 (~((hash << 11) + (i2 ^ (hash >> 5))));
564 remaining_length -= 8;
567 if (remaining_length)
569 if (remaining_length >= 4)
571 const unsigned int& i = *(
reinterpret_cast<const unsigned int*
>(itr));
574 hash ^= (hash << 7) ^ i * (hash >> 3);
576 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
580 remaining_length -= 4;
582 itr +=
sizeof(
unsigned int);
585 if (remaining_length >= 2)
587 const unsigned short& i = *(
reinterpret_cast<const unsigned short*
>(itr));
590 hash ^= (hash << 7) ^ i * (hash >> 3);
592 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
596 remaining_length -= 2;
598 itr +=
sizeof(
unsigned short);
601 if (remaining_length)
603 hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
648 size_list.push_back(table_size_);
651 inline unsigned long long int size()
const 653 return size_list.back();
659 (percentage < 0.0) ||
660 (percentage >= 100.0)
666 unsigned long long int original_table_size = size_list.back();
667 unsigned long long int new_table_size =
static_cast<unsigned long long int>((size_list.back() * (1.0 - (percentage / 100.0))));
673 (new_table_size >= original_table_size)
679 desired_false_positive_probability_ = effective_fpp();
681 const unsigned long long int new_tbl_raw_size = new_table_size /
bits_per_char;
685 std::copy(bit_table_.begin(), bit_table_.begin() + new_tbl_raw_size, tmp.begin());
687 typedef table_type::iterator itr_t;
689 itr_t itr = bit_table_.begin() + (new_table_size /
bits_per_char);
690 itr_t end = bit_table_.begin() + (original_table_size /
bits_per_char);
691 itr_t itr_tmp = tmp.begin();
695 *(itr_tmp++) |= (*itr++);
698 std::swap(bit_table_, tmp);
700 size_list.push_back(new_table_size);
707 inline void compute_indices(
const bloom_type& hash, std::size_t& bit_index, std::size_t& bit)
const 711 for (std::size_t i = 0; i < size_list.size(); ++i)
713 bit_index %= size_list[i];
719 std::vector<unsigned long long int> size_list;
bloom_filter(const bloom_filter &filter)
virtual bool compute_optimal_parameters()
unsigned long long int random_seed
void insert(const unsigned char *key_begin, const std::size_t &length)
static const unsigned char bit_mask[bits_per_char]
InputIterator contains_all(const InputIterator begin, const InputIterator end) const
unsigned long long int table_size
static const std::size_t bits_per_char
unsigned long long int minimum_size
unsigned long long int element_count() const
bloom_filter operator&(const bloom_filter &a, const bloom_filter &b)
unsigned int maximum_number_of_hashes
bool contains(const std::string &key) const
double false_positive_probability
bloom_filter operator|(const bloom_filter &a, const bloom_filter &b)
std::vector< unsigned char > table_type
bool contains(const char *data, const std::size_t &length) const
const cell_type * table() const
std::vector< unsigned char > bit_table_
unsigned long long int table_size_
bool contains(const T &t) const
virtual unsigned long long int size() const
unsigned int minimum_number_of_hashes
unsigned long long int inserted_element_count_
unsigned int number_of_hashes
bloom_type hash_ap(const unsigned char *begin, std::size_t remaining_length, bloom_type hash) const
double effective_fpp() const
void generate_unique_salt()
virtual ~bloom_parameters()
void insert(const std::string &key)
unsigned long long int random_seed_
unsigned long long int maximum_size
bool compress(const double &percentage)
InputIterator contains_none(const InputIterator begin, const InputIterator end) const
optimal_parameters_t optimal_parameters
std::vector< bloom_type > salt_
virtual void compute_indices(const bloom_type &hash, std::size_t &bit_index, std::size_t &bit) const
void insert(const InputIterator begin, const InputIterator end)
double desired_false_positive_probability_
void insert(const char *data, const std::size_t &length)
bloom_filter operator^(const bloom_filter &a, const bloom_filter &b)
unsigned long long int projected_element_count_
unsigned long long int size() const
virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
unsigned long long int projected_element_count
compressible_bloom_filter(const bloom_parameters &p)
bloom_filter(const bloom_parameters &p)