46 bool load_word_list(
int argc,
char* argv[], std::vector<std::string>& word_list);
50 template <
class,
class>
class Container>
51 bool read_file(
const std::string& file_name, Container<T, Allocator>& c);
53 void generate_outliers(
const std::vector<std::string>& word_list, std::deque<std::string>& outliers);
54 void purify_outliers(
const std::vector<std::string>& word_list,std::deque<std::string>& outliers);
56 int main(
int argc,
char* argv[])
58 std::vector<std::string> word_list;
59 std::deque<std::string> outliers;
68 unsigned int random_seed = 0xA57EC3B2;
70 const double desired_probability_of_false_positive = 1.0 / word_list.size();
80 std::cout <<
"Error - Invalid set of bloom filter parameters!" << std::endl;
88 filter.
insert(word_list.begin(),word_list.end());
90 std::cout <<
"Filter Size\tEFPP \tOFPP \tDiff" << std::endl;
92 while (filter.
size() > 1)
94 std::vector<std::string>::iterator it = filter.
contains_all(word_list.begin(),word_list.end());
96 if (word_list.end() != it)
98 std::cout <<
"ERROR: key not found in bloom filter! =>" << (*it) << std::endl;
102 std::size_t total_false_positive = 0;
104 for (std::deque<std::string>::iterator itr = outliers.begin(); itr != outliers.end(); ++itr)
106 if (filter.
contains(*itr)) ++total_false_positive;
109 double pfp = total_false_positive / (1.0 * outliers.size());
111 printf(
"%11llu\t%8.7f\t%8.7f\t%8.6f\n",
112 static_cast<unsigned long long>(filter.
size()),
119 std::cout <<
"Filter cannot be compressed any further." << std::endl;
131 static const std::string wl_list[] =
133 "word-list-large.txt",
134 "word-list-extra-large.txt",
138 std::size_t index = 0;
142 index = ::atoi(argv[1]);
144 const std::size_t wl_list_size =
sizeof(wl_list) /
sizeof(std::string);
146 if (index >= wl_list_size)
148 std::cout <<
"Invalid world list index: " << index << std::endl;
153 std::cout <<
"Loading list " << wl_list[index] <<
".....";
155 if (!
read_file(wl_list[index],word_list))
160 if (word_list.empty())
162 std::cout <<
"No word list - Either none requested, or desired word list could not be loaded." << std::endl;
166 std::cout <<
" Complete." << std::endl;
173 template <
class,
class>
class Container>
174 bool read_file(
const std::string& file_name, Container<T, Allocator>& c)
176 std::ifstream stream(file_name.c_str());
180 std::cout <<
"Error: Failed to open file '" << file_name <<
"'" << std::endl;
186 while (std::getline(stream,buffer))
201 void generate_outliers(
const std::vector<std::string>& word_list, std::deque<std::string>& outliers)
203 std::cout <<
"Generating outliers..... ";
205 for (std::vector<std::string>::const_iterator it = word_list.begin(); it != word_list.end(); ++it)
209 outliers.push_back((*it) +
reverse((*it)));
210 outliers.push_back((*it) + (*it));
214 std::string ns = *it;
216 for (
unsigned int i = 0; i < ns.size(); ++i)
218 if (1 == (i & 0x00)) ns[i] = ~ns[i];
221 outliers.push_back(ns);
224 static const std::string rand_str[] =
226 "oD5l",
"pccW",
"5yHt",
"ndaN",
"OaJh",
"tWPc",
"Cr9C",
"a9zE",
227 "H1wL",
"yo1V",
"16D7",
"f2WR",
"0MVQ",
"PkKn",
"PlVa",
"MvzL",
228 "9Csl",
"JQTv",
"IveD",
"FDVS",
"Q7HE",
"QgcF",
"Q9Vo",
"V8zJ",
229 "EJWT",
"GuLC",
"rM3d",
"PJF4",
"HXPW",
"qKx3",
"ztRP",
"t4KP",
230 "m1zV",
"fn12",
"B1QP",
"Jr4I",
"Mf8M",
"4jBd",
"anGR",
"Pipt",
231 "QHon",
"GNlc",
"UeXM",
"mVM5",
"ABI8",
"RhB3",
"5h2s",
"hOYo",
232 "gaId",
"DX40",
"THMu",
"EwlP",
"n9Mz",
"oC1S",
"BfMl",
"uCZ1",
233 "G2bA",
"MOH9",
"zZ0O",
"PKDO",
"3nRU",
"Z6ie",
"4cso",
"LnQO",
234 "MJTtT",
"td3rC",
"A5JNR",
"1yL5B",
"rQnJk",
"jNKYF",
"CD0XD",
"pFLSG",
235 "fxO1a",
"CAjBE",
"ORk4e",
"0LERI",
"R7d0x",
"Qqd7v",
"6Kih5",
"9tTCB",
236 "yCg9U",
"D2Tv7",
"XpNHn",
"6zeFQ",
"BT2cs",
"WGhKW",
"zTv6B",
"TTPFk",
237 "XjNVX",
"pg9yW",
"4pKiZ",
"mQUhL",
"xrXzR",
"kVRm5",
"NSyC4",
"olXm9",
238 "UWkYy",
"8Ys6r",
"yd4Fl",
"5L4mB",
"nP3nH",
"f0DFb",
"glnQa",
"DlXQa",
239 "cQdH6",
"eBmIN",
"fDj6F",
"ezLow",
"C15vu",
"I2Z2j",
"BQgzg",
"eVBid",
240 "hn5TO",
"WZyQN",
"xXgsE",
"sL6nK",
"8DKD8",
"jcrbp",
"AcRak",
"h8N5o",
241 "LViwC",
"ThEKf",
"O7fd5",
"oN0Id",
"OM1m0",
"4OLiR",
"VIa8N",
"bJZFG",
242 "9j3rL",
"SzW0N",
"7m7pY",
"mY9bg",
"k1p3e",
"3OFm1",
"r45se",
"VYwz3",
243 "pDjXt",
"ZcqcJ",
"npPHx",
"hA3bw",
"w7lSO",
"jEmZL",
"1x3AZ",
"FN47G",
244 "kThNf",
"aC4fq",
"rzDwi",
"CYRNG",
"gCeuG",
"wCVqO",
"d1R60",
"bEauW",
245 "KeUwW",
"lIKhO",
"RfPv3",
"dK5wE",
"1X7qu",
"tRwEn",
"1c03P",
"GwHCl",
246 "CsJaO",
"zl4j1",
"e0aEc",
"Uskgi",
"rgTGR",
"jyR4g",
"Tt6l4",
"lRoaw",
247 "94ult",
"qZwBX",
"eYW8S",
"Qf6UH",
"AbV56",
"N1hJq",
"JIaVe",
"8LHEx",
248 "DeNbS",
"30I0a",
"hm6qw",
"3jcaO",
"4WkuA",
"mQ219",
"Gb81C",
"yx4HM",
249 "Chlqfi9S1y",
"BMwUgVFu2X",
"ZmpEGOVrVe",
"13ggJxrPkC",
"fcJJpyMGjm",
"9T00Dv4ZAb",
250 "p3YRcP7M2o",
"sR0qNUXCHv",
"gCxWZbJ6rb",
"R4YtzRXXUl",
"vwyYz5j6pY",
"XPWUvLXhJ7",
251 "7PwfnVVb7U",
"1f34Q6hOYz",
"1EM2abZY61",
"0a6Ivi4S0a",
"Teq2LrQs2T",
"dWXLCgWHc8",
252 "LawMv7ujn4",
"N8VFgbZQx5",
"tfvHHxoDgi",
"ImwYgXA2tf",
"KkIES9NqZO",
"ajcz0qjjda",
253 "6Vz28vlGs9",
"VMCc5W8cCt",
"BiQB8BRJ98",
"43CpOJSMpA",
"jfBJdqwXcU",
"ecHR9EO2ib",
254 "LH7CcXyCZ7",
"JntqGSgSpa",
"0MbTMpZPFW",
"5FJSdiCXzR",
"5gda2AhA2x",
"lrDFc1lnXk",
255 "zrEwECHvjs",
"B0JldDxFa1",
"6DYal4QxKa",
"Hsqx6kP2S4",
"zZwnALSuFh",
"Shh4ISZcKW",
256 "P9VDaNSk7Z",
"mEI2PLSCO6",
"WyTyrQORtu",
"IvJyMMRgh3",
"Q6pgJq8Nkv",
"dhOgR3tDAD",
257 "Y9h6bVgbxO",
"wA15tiOPTm",
"8TaIKf1zCO",
"z75dzabHBs",
"AS6OPnwoJI",
"2DSZka9Auj",
258 "QLzUjV2CWs",
"KZSN2SVhia",
"7ttYKWF2ue",
"1Zxfu7B2ST",
"RnkpmwjsCi",
"YpcSIzaqx5",
259 "RDEwFD9gmX",
"Nlx3V4Cjw4",
"9ZdvITOj8M",
"httUPWMNXO",
"Ypv9PjxGwa",
"LlwyNolNnH",
260 "6xpJOht47a",
"tbmz4WIdcG",
"OwzuVDlb7D",
"PBQKJxo8DQ",
"uVnMQn7hK6",
"rlnZINuDUa",
261 "2feyyYukPa",
"teOlpKuDBn",
"LxBSWh0dL1",
"Onyb7r4Jp0",
"bZxXE6xOXg",
"d9NSvNTunQ",
262 "ONerLBic32",
"8mar4rKmFk",
"5cCN9uwaCg",
"ElVrYOHHMv",
"YF6Og8DX40",
"OgiCwpCQ5a",
263 "K6nSRZVxdR",
"gqyXXXoVFW",
"ulyRYizcBP",
"khUx31K5UR",
"qZFRzVthju",
"pQBh0vnB20",
264 "dk8NIN7ajy",
"XP7ed1OjZx",
"IRYNwA5iFR",
"hiSEBhTukC",
"Ns4jJ3jzGo",
"dYoCSxjIvM",
265 "HzGLbl5i1g",
"baizENd4ko",
"6rCqGBO8t1",
"QWGfC8UaA7",
"JFhRfxQe4K",
"8R4W6IWANz",
266 "2TnWf1w7JH",
"0z69e0wcoG",
"8SN1mRHCY7",
"oFGCYHHwGX",
"G8xqnBgxjO",
"6B3SAOayHt",
267 "XRW3ZSG1gw",
"WcIjTxMxOM",
"wNqCAIaTb2",
"gO4em4HW8H",
"TgGFSMEtbG",
"WiwmbEw3QA",
268 "D2xshYUgpu",
"xRUZCQVzBs",
"nCnUmMgIjE",
"p4Ewt1yCJr",
"MeOjDcaMY5",
"1XelMeXiiI" 270 static const std::size_t rand_str_size =
sizeof(rand_str) /
sizeof(std::string);
272 for (
unsigned int i = 0; i < rand_str_size; ++i)
274 std::string s0 = rand_str[i];
275 std::string s1 = rand_str[(i + 1) % rand_str_size];
276 std::string s2 = rand_str[(i + 2) % rand_str_size];
277 std::string s3 = rand_str[(i + 3) % rand_str_size];
278 std::string s4 = rand_str[(i + 4) % rand_str_size];
279 std::string s5 = rand_str[(i + 5) % rand_str_size];
280 std::string s6 = rand_str[(i + 6) % rand_str_size];
282 outliers.push_back(s0);
283 outliers.push_back(s0 + s1);
284 outliers.push_back(s0 + s2 + s4);
285 outliers.push_back(s0 + s1 + s3);
286 outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5);
287 outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5 + s6);
289 outliers.push_back(
reverse(s0));
290 outliers.push_back(
reverse(s0 + s1));
291 outliers.push_back(
reverse(s0 + s2 + s4));
292 outliers.push_back(
reverse(s0 + s1 + s3));
293 outliers.push_back(
reverse(s0 + s1 + s2 + s3 + s4 + s5));
294 outliers.push_back(
reverse(s0 + s1 + s2 + s3 + s4 + s5 + s6));
297 std::sort(outliers.begin(),outliers.end());
301 std::cout <<
"Complete." << std::endl;
304 void purify_outliers(
const std::vector<std::string>& word_list, std::deque<std::string>& outliers)
306 std::set<std::string> set1;
307 std::set<std::string> set2;
309 std::copy(word_list.begin(), word_list.end(), std::inserter(set1,set1.begin()));
310 std::copy(outliers .begin(), outliers .end(), std::inserter(set2,set2.begin()));
312 std::deque<std::string> intersect_list;
314 std::set_intersection(set1.begin(),set1.end(),
315 set2.begin(),set2.end(),
316 std::back_inserter(intersect_list));
318 std::sort(intersect_list.begin(),intersect_list.end());
320 if (!intersect_list.empty())
322 std::deque<std::string> new_outliers;
324 for (std::deque<std::string>::iterator it = outliers.begin(); it != outliers.end(); ++it)
326 if (!std::binary_search(intersect_list.begin(),intersect_list.end(),*it))
328 new_outliers.push_back(*it);
332 outliers.swap(new_outliers);
virtual bool compute_optimal_parameters()
unsigned long long int random_seed
bool read_file(const std::string &file_name, Container< T, Allocator > &c)
void insert(const unsigned char *key_begin, const std::size_t &length)
InputIterator contains_all(const InputIterator begin, const InputIterator end) const
unsigned int maximum_number_of_hashes
bool load_word_list(int argc, char *argv[], std::vector< std::string > &word_list)
double false_positive_probability
double effective_fpp() const
void generate_outliers(const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
bool compress(const double &percentage)
std::string reverse(std::string str)
unsigned long long int size() const
void purify_outliers(const std::vector< std::string > &word_list, std::deque< std::string > &outliers)
virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
unsigned long long int projected_element_count
int main(int argc, char *argv[])