C++ Bloom Filter Library  release
Functions
bloom_filter_example03.cpp File Reference
#include <iostream>
#include <cstddef>
#include <cstdio>
#include <fstream>
#include <iterator>
#include <algorithm>
#include <vector>
#include <deque>
#include <set>
#include <string>
#include "bloom_filter.hpp"
Include dependency graph for bloom_filter_example03.cpp:

Go to the source code of this file.

Functions

bool load_word_list (int argc, char *argv[], std::vector< std::string > &word_list)
 
template<class T , class Allocator , template< class, class > class Container>
bool read_file (const std::string &file_name, Container< T, Allocator > &c)
 
void generate_outliers (const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
 
void purify_outliers (const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
 
int main (int argc, char *argv[])
 
std::string reverse (std::string str)
 

Function Documentation

◆ generate_outliers()

void generate_outliers ( const std::vector< std::string > &  word_list,
std::deque< std::string > &  outliers 
)

Definition at line 201 of file bloom_filter_example03.cpp.

References purify_outliers(), and reverse().

Referenced by main().

202 {
203  std::cout << "Generating outliers..... ";
204 
205  for (std::vector<std::string>::const_iterator it = word_list.begin(); it != word_list.end(); ++it)
206  {
207  if ((*it) != reverse((*it)))
208  {
209  outliers.push_back((*it) + reverse((*it)));
210  outliers.push_back((*it) + (*it));
211  outliers.push_back(reverse((*it)) + (*it) + reverse((*it)));
212  }
213 
214  std::string ns = *it;
215 
216  for (unsigned int i = 0; i < ns.size(); ++i)
217  {
218  if (1 == (i & 0x00)) ns[i] = ~ns[i];
219  }
220 
221  outliers.push_back(ns);
222  }
223 
224  static const std::string rand_str[] =
225  {
226  "oD5l", "pccW", "5yHt", "ndaN", "OaJh", "tWPc", "Cr9C", "a9zE",
227  "H1wL", "yo1V", "16D7", "f2WR", "0MVQ", "PkKn", "PlVa", "MvzL",
228  "9Csl", "JQTv", "IveD", "FDVS", "Q7HE", "QgcF", "Q9Vo", "V8zJ",
229  "EJWT", "GuLC", "rM3d", "PJF4", "HXPW", "qKx3", "ztRP", "t4KP",
230  "m1zV", "fn12", "B1QP", "Jr4I", "Mf8M", "4jBd", "anGR", "Pipt",
231  "QHon", "GNlc", "UeXM", "mVM5", "ABI8", "RhB3", "5h2s", "hOYo",
232  "gaId", "DX40", "THMu", "EwlP", "n9Mz", "oC1S", "BfMl", "uCZ1",
233  "G2bA", "MOH9", "zZ0O", "PKDO", "3nRU", "Z6ie", "4cso", "LnQO",
234  "MJTtT","td3rC","A5JNR","1yL5B","rQnJk","jNKYF","CD0XD","pFLSG",
235  "fxO1a","CAjBE","ORk4e","0LERI","R7d0x","Qqd7v","6Kih5","9tTCB",
236  "yCg9U","D2Tv7","XpNHn","6zeFQ","BT2cs","WGhKW","zTv6B","TTPFk",
237  "XjNVX","pg9yW","4pKiZ","mQUhL","xrXzR","kVRm5","NSyC4","olXm9",
238  "UWkYy","8Ys6r","yd4Fl","5L4mB","nP3nH","f0DFb","glnQa","DlXQa",
239  "cQdH6","eBmIN","fDj6F","ezLow","C15vu","I2Z2j","BQgzg","eVBid",
240  "hn5TO","WZyQN","xXgsE","sL6nK","8DKD8","jcrbp","AcRak","h8N5o",
241  "LViwC","ThEKf","O7fd5","oN0Id","OM1m0","4OLiR","VIa8N","bJZFG",
242  "9j3rL","SzW0N","7m7pY","mY9bg","k1p3e","3OFm1","r45se","VYwz3",
243  "pDjXt","ZcqcJ","npPHx","hA3bw","w7lSO","jEmZL","1x3AZ","FN47G",
244  "kThNf","aC4fq","rzDwi","CYRNG","gCeuG","wCVqO","d1R60","bEauW",
245  "KeUwW","lIKhO","RfPv3","dK5wE","1X7qu","tRwEn","1c03P","GwHCl",
246  "CsJaO","zl4j1","e0aEc","Uskgi","rgTGR","jyR4g","Tt6l4","lRoaw",
247  "94ult","qZwBX","eYW8S","Qf6UH","AbV56","N1hJq","JIaVe","8LHEx",
248  "DeNbS","30I0a","hm6qw","3jcaO","4WkuA","mQ219","Gb81C","yx4HM",
249  "Chlqfi9S1y", "BMwUgVFu2X", "ZmpEGOVrVe", "13ggJxrPkC", "fcJJpyMGjm", "9T00Dv4ZAb",
250  "p3YRcP7M2o", "sR0qNUXCHv", "gCxWZbJ6rb", "R4YtzRXXUl", "vwyYz5j6pY", "XPWUvLXhJ7",
251  "7PwfnVVb7U", "1f34Q6hOYz", "1EM2abZY61", "0a6Ivi4S0a", "Teq2LrQs2T", "dWXLCgWHc8",
252  "LawMv7ujn4", "N8VFgbZQx5", "tfvHHxoDgi", "ImwYgXA2tf", "KkIES9NqZO", "ajcz0qjjda",
253  "6Vz28vlGs9", "VMCc5W8cCt", "BiQB8BRJ98", "43CpOJSMpA", "jfBJdqwXcU", "ecHR9EO2ib",
254  "LH7CcXyCZ7", "JntqGSgSpa", "0MbTMpZPFW", "5FJSdiCXzR", "5gda2AhA2x", "lrDFc1lnXk",
255  "zrEwECHvjs", "B0JldDxFa1", "6DYal4QxKa", "Hsqx6kP2S4", "zZwnALSuFh", "Shh4ISZcKW",
256  "P9VDaNSk7Z", "mEI2PLSCO6", "WyTyrQORtu", "IvJyMMRgh3", "Q6pgJq8Nkv", "dhOgR3tDAD",
257  "Y9h6bVgbxO", "wA15tiOPTm", "8TaIKf1zCO", "z75dzabHBs", "AS6OPnwoJI", "2DSZka9Auj",
258  "QLzUjV2CWs", "KZSN2SVhia", "7ttYKWF2ue", "1Zxfu7B2ST", "RnkpmwjsCi", "YpcSIzaqx5",
259  "RDEwFD9gmX", "Nlx3V4Cjw4", "9ZdvITOj8M", "httUPWMNXO", "Ypv9PjxGwa", "LlwyNolNnH",
260  "6xpJOht47a", "tbmz4WIdcG", "OwzuVDlb7D", "PBQKJxo8DQ", "uVnMQn7hK6", "rlnZINuDUa",
261  "2feyyYukPa", "teOlpKuDBn", "LxBSWh0dL1", "Onyb7r4Jp0", "bZxXE6xOXg", "d9NSvNTunQ",
262  "ONerLBic32", "8mar4rKmFk", "5cCN9uwaCg", "ElVrYOHHMv", "YF6Og8DX40", "OgiCwpCQ5a",
263  "K6nSRZVxdR", "gqyXXXoVFW", "ulyRYizcBP", "khUx31K5UR", "qZFRzVthju", "pQBh0vnB20",
264  "dk8NIN7ajy", "XP7ed1OjZx", "IRYNwA5iFR", "hiSEBhTukC", "Ns4jJ3jzGo", "dYoCSxjIvM",
265  "HzGLbl5i1g", "baizENd4ko", "6rCqGBO8t1", "QWGfC8UaA7", "JFhRfxQe4K", "8R4W6IWANz",
266  "2TnWf1w7JH", "0z69e0wcoG", "8SN1mRHCY7", "oFGCYHHwGX", "G8xqnBgxjO", "6B3SAOayHt",
267  "XRW3ZSG1gw", "WcIjTxMxOM", "wNqCAIaTb2", "gO4em4HW8H", "TgGFSMEtbG", "WiwmbEw3QA",
268  "D2xshYUgpu", "xRUZCQVzBs", "nCnUmMgIjE", "p4Ewt1yCJr", "MeOjDcaMY5", "1XelMeXiiI"
269  };
270  static const std::size_t rand_str_size = sizeof(rand_str) / sizeof(std::string);
271 
272  for (unsigned int i = 0; i < rand_str_size; ++i)
273  {
274  std::string s0 = rand_str[i];
275  std::string s1 = rand_str[(i + 1) % rand_str_size];
276  std::string s2 = rand_str[(i + 2) % rand_str_size];
277  std::string s3 = rand_str[(i + 3) % rand_str_size];
278  std::string s4 = rand_str[(i + 4) % rand_str_size];
279  std::string s5 = rand_str[(i + 5) % rand_str_size];
280  std::string s6 = rand_str[(i + 6) % rand_str_size];
281 
282  outliers.push_back(s0);
283  outliers.push_back(s0 + s1);
284  outliers.push_back(s0 + s2 + s4);
285  outliers.push_back(s0 + s1 + s3);
286  outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5);
287  outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5 + s6);
288 
289  outliers.push_back(reverse(s0));
290  outliers.push_back(reverse(s0 + s1));
291  outliers.push_back(reverse(s0 + s2 + s4));
292  outliers.push_back(reverse(s0 + s1 + s3));
293  outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5));
294  outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5 + s6));
295  }
296 
297  std::sort(outliers.begin(),outliers.end());
298 
299  purify_outliers(word_list,outliers);
300 
301  std::cout << "Complete." << std::endl;
302 }
std::string reverse(std::string str)
void purify_outliers(const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ load_word_list()

bool load_word_list ( int  argc,
char *  argv[],
std::vector< std::string > &  word_list 
)

Definition at line 127 of file bloom_filter_example03.cpp.

References read_file().

Referenced by main().

128 {
129  // Note: The word-lists can be obtained from:
130  // https://github.com/ArashPartow/bloom
131  static const std::string wl_list[] =
132  { "word-list.txt",
133  "word-list-large.txt",
134  "word-list-extra-large.txt",
135  "random-list.txt"
136  };
137 
138  std::size_t index = 0;
139 
140  if (2 == argc)
141  {
142  index = ::atoi(argv[1]);
143 
144  const std::size_t wl_list_size = sizeof(wl_list) / sizeof(std::string);
145 
146  if (index >= wl_list_size)
147  {
148  std::cout << "Invalid world list index: " << index << std::endl;
149  return false;
150  }
151  }
152 
153  std::cout << "Loading list " << wl_list[index] << ".....";
154 
155  if (!read_file(wl_list[index],word_list))
156  {
157  return false;
158  }
159 
160  if (word_list.empty())
161  {
162  std::cout << "No word list - Either none requested, or desired word list could not be loaded." << std::endl;
163  return false;
164  }
165  else
166  std::cout << " Complete." << std::endl;
167 
168  return true;
169 }
bool read_file(const std::string &file_name, Container< T, Allocator > &c)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 56 of file bloom_filter_example03.cpp.

References compressible_bloom_filter::compress(), bloom_parameters::compute_optimal_parameters(), bloom_filter::contains(), bloom_filter::contains_all(), bloom_filter::effective_fpp(), bloom_parameters::false_positive_probability, generate_outliers(), bloom_filter::insert(), load_word_list(), bloom_parameters::maximum_number_of_hashes, bloom_parameters::projected_element_count, bloom_parameters::random_seed, and compressible_bloom_filter::size().

57 {
58  std::vector<std::string> word_list;
59  std::deque<std::string> outliers;
60 
61  if (!load_word_list(argc,argv,word_list))
62  {
63  return 1;
64  }
65 
66  generate_outliers(word_list,outliers);
67 
68  unsigned int random_seed = 0xA57EC3B2;
69 
70  const double desired_probability_of_false_positive = 1.0 / word_list.size();
71 
72  bloom_parameters parameters;
73  parameters.projected_element_count = word_list.size();
74  parameters.false_positive_probability = desired_probability_of_false_positive;
75  parameters.random_seed = random_seed++;
76  parameters.maximum_number_of_hashes = 7;
77 
78  if (!parameters)
79  {
80  std::cout << "Error - Invalid set of bloom filter parameters!" << std::endl;
81  return 1;
82  }
83 
84  parameters.compute_optimal_parameters();
85 
86  compressible_bloom_filter filter(parameters);
87 
88  filter.insert(word_list.begin(),word_list.end());
89 
90  std::cout << "Filter Size\tEFPP \tOFPP \tDiff" << std::endl;
91 
92  while (filter.size() > 1)
93  {
94  std::vector<std::string>::iterator it = filter.contains_all(word_list.begin(),word_list.end());
95 
96  if (word_list.end() != it)
97  {
98  std::cout << "ERROR: key not found in bloom filter! =>" << (*it) << std::endl;
99  return 1;
100  }
101 
102  std::size_t total_false_positive = 0;
103 
104  for (std::deque<std::string>::iterator itr = outliers.begin(); itr != outliers.end(); ++itr)
105  {
106  if (filter.contains(*itr)) ++total_false_positive;
107  }
108 
109  double pfp = total_false_positive / (1.0 * outliers.size());
110 
111  printf("%11llu\t%8.7f\t%8.7f\t%8.6f\n",
112  static_cast<unsigned long long>(filter.size()),
113  filter.effective_fpp(),
114  pfp,
115  100.0 * (pfp / filter.effective_fpp()));
116 
117  if (!filter.compress(5.0))
118  {
119  std::cout << "Filter cannot be compressed any further." << std::endl;
120  break;
121  }
122  }
123 
124  return 0;
125 }
virtual bool compute_optimal_parameters()
unsigned long long int random_seed
unsigned int maximum_number_of_hashes
bool load_word_list(int argc, char *argv[], std::vector< std::string > &word_list)
double false_positive_probability
void generate_outliers(const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
unsigned long long int projected_element_count
Here is the call graph for this function:

◆ purify_outliers()

void purify_outliers ( const std::vector< std::string > &  word_list,
std::deque< std::string > &  outliers 
)

Definition at line 304 of file bloom_filter_example03.cpp.

Referenced by generate_outliers().

305 {
306  std::set<std::string> set1;
307  std::set<std::string> set2;
308 
309  std::copy(word_list.begin(), word_list.end(), std::inserter(set1,set1.begin()));
310  std::copy(outliers .begin(), outliers .end(), std::inserter(set2,set2.begin()));
311 
312  std::deque<std::string> intersect_list;
313 
314  std::set_intersection(set1.begin(),set1.end(),
315  set2.begin(),set2.end(),
316  std::back_inserter(intersect_list));
317 
318  std::sort(intersect_list.begin(),intersect_list.end());
319 
320  if (!intersect_list.empty())
321  {
322  std::deque<std::string> new_outliers;
323 
324  for (std::deque<std::string>::iterator it = outliers.begin(); it != outliers.end(); ++it)
325  {
326  if (!std::binary_search(intersect_list.begin(),intersect_list.end(),*it))
327  {
328  new_outliers.push_back(*it);
329  }
330  }
331 
332  outliers.swap(new_outliers);
333  }
334 }
Here is the caller graph for this function:

◆ read_file()

template<class T , class Allocator , template< class, class > class Container>
bool read_file ( const std::string &  file_name,
Container< T, Allocator > &  c 
)

Definition at line 174 of file bloom_filter_example03.cpp.

Referenced by load_word_list().

175 {
176  std::ifstream stream(file_name.c_str());
177 
178  if (!stream)
179  {
180  std::cout << "Error: Failed to open file '" << file_name << "'" << std::endl;
181  return false;
182  }
183 
184  std::string buffer;
185 
186  while (std::getline(stream,buffer))
187  {
188  c.push_back(buffer);
189  }
190 
191  return true;
192 }
Here is the caller graph for this function:

◆ reverse()

std::string reverse ( std::string  str)

Definition at line 194 of file bloom_filter_example03.cpp.

Referenced by generate_outliers().

195 {
196  // Not the most efficient way of doing this.
197  std::reverse(str.begin(),str.end());
198  return str;
199 }
std::string reverse(std::string str)
Here is the caller graph for this function: