A Discrete-Event Network Simulator
API
Loading...
Searching...
No Matches
hash-example.cc
Go to the documentation of this file.
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation;
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 */
15
16#include "ns3/core-module.h"
17#include "ns3/hash.h"
18
19#include <algorithm> // find
20#include <climits> // CHAR_BIT
21#include <fstream>
22#include <iomanip>
23#include <iostream>
24#include <map>
25#include <vector>
26
27/**
28 * \file
29 * \ingroup core-examples
30 * \ingroup hash
31 * Example usage of ns3::Hash.
32 *
33 * This example reads words from a list of files, creates a dictionary
34 * mapping words to hash values, reports collisions, and measures the
35 * execution time of the hash function implementations.
36 *
37 * See \ref hash
38 *
39 * Example Output:
40 * \verbatim
41
42./ns3 run "hasher-example --time \
43 --dict=/usr/share/dict/web2 \
44 --dict=/usr/share/dict/web2a \
45 --dict=/usr/share/dict/propernames \
46 --dict=/usr/share/dict/connectives"
47
48'build' finished successfully (3.028s)
49
50Hasher
51Hashing the dictionaries
52Dictionary file: /usr/share/dict/web2
53Dictionary file: /usr/share/dict/web2a
54Dictionary file: /usr/share/dict/propernames
55Dictionary file: /usr/share/dict/connectives
56
57Number of words or phrases: 312094
58Expected number of collisions: (32-bit table) 11.3389
59Expected number of collisions: (64-bit table) 2.6401e-09
60
61FNV1a (32-bit version): 13 collisions:
62a75b0ae7 elephantlike interventralia
63091c4808 diversionary propenseness
64172be6ba bairnishness sora
65e6cb5099 purifier spongoblastic
664a841078 ameliorable unsmotherable
676ed21de2 brand-newness peripherial
6822acb19b Petrarchism dewy-pinioned
695723634a grain gold hyphenation
70f58026c1 seven-channeled turritella
71946fc6ec multiradiate sister block
7288625851 brachtmema ule tree
73dc28b5ea Un-lutheran gutturotetany
749255bf44 re-sorter working stress
75
76FNV1a (64-bit version): 0 collisions:
77
78Murmur3 (32-bit version): 11 collisions:
795ea83eee impalace metahewettite
80e06fbdde constancy oligosynthetic
812a713795 hypermonosyllable presatisfaction
82c8bf0ef9 Hadromerina starky
83d9c04b3d Accipiter syllable
84c0da8f81 seriation trigonon
8517612b26 daemon unerring
86c2349ad7 air spring iron
871d91386f nine-pounder semicrescentic
88fe17b1a5 cone speaker oblong-wedgeshaped
89faa12798 saw bearing wilting point
90
91Murmur3 (64-bit version): 0 collisions:
92
93Hash timing Phrases Reps Ticks ns/hash
94FNV1a (32-bit version) 312094 100 3140531 100.628
95FNV1a (64-bit version) 312094 100 3145240 100.779
96Murmur3 (32-bit version) 312094 100 4152139 133.041
97Murmur3 (64-bit version) 312094 100 4191464 134.301
98
99 \endverbatim
100 */
101
102namespace ns3
103{
104
106
107namespace Hash
108{
109
110/**
111 * \ingroup hash
112 * Namespace for hasher-example.
113 */
114namespace Example
115{
116
117/**
118 * Keep track of collisions
119 */
121{
122 public:
123 std::string m_name; /**< Name of this hash. */
124 Hasher m_hash; /**< The hash. */
125
126 /** The size of hash function being tested. */
127 enum Bits
128 {
129 Bits32, /**< Use 32-bit hash function. */
130 Bits64 /**< Use 64-bit hash function. */
131 };
132
133 /**
134 * Constructor.
135 *
136 * \param [in] name Hash function name.
137 * \param [in] hash Hash function.
138 * \param [in] bits Which hash length to use.
139 */
140 Collider(const std::string name, Hasher hash, const Bits bits)
141 : m_name(name),
142 m_hash(hash),
143 m_bits(bits)
144 {
145 }
146
147 /**
148 * Add a string to the Collider.
149 *
150 * \param [in] phrase The string to add.
151 * \return \c true If this was a new string.
152 */
153 bool Add(const std::string phrase)
154 {
155 uint64_t h = GetHash(phrase);
156
157 // Check for collisions
158 if (m_dict.count(h) > 0)
159 {
160 // we've seen this hash before, why?
161 if (phrase == m_dict[h])
162 {
163 // duplicate phrase
164 return false;
165 }
166
167 // new phrase generating a real collision
168 // alphabetize
169 if (m_dict[h] < phrase)
170 {
171 m_coll.emplace_back(h, phrase);
172 }
173 else
174 {
175 m_coll.emplace_back(h, m_dict[h]);
176 m_dict[h] = phrase;
177 }
178 }
179 else
180 {
181 // Insert new hash
182 m_dict.insert(std::make_pair(h, phrase));
183 }
184 return true;
185 } // Add ()
186
187 /**
188 * \return The hash name, including the length.
189 */
190 std::string GetName() const
191 {
192 std::string name = m_name;
193
194 switch (m_bits)
195 {
196 case Bits32:
197 name += " (32-bit version)";
198 break;
199 case Bits64:
200 name += " (64-bit version)";
201 break;
202 default:
203 name += " (unknown!?!)";
204 }
205 return name;
206 }
207
208 /** Print the collisions found. */
209 void Report() const
210 {
211 std::cout << std::endl;
212
213 std::cout << GetName() << ": " << m_coll.size() << " collisions:" << std::endl;
214 for (const auto& collision : m_coll)
215 {
216 uint64_t h = collision.first;
217
218 std::cout << std::setfill('0') << std::hex << std::setw(8) << h << std::dec
219 << std::setfill(' ') << " " << std::setw(20) << std::left
220 << m_dict.find(h)->second << collision.second << std::right << std::endl;
221 }
222 } // Report ()
223
224 private:
225 /**
226 * Get the appropriate hash value.
227 *
228 * \param [in] phrase The string to hash.
229 * \return The hash value, using the number of bits set in the constructor.
230 */
231 uint64_t GetHash(const std::string phrase)
232 {
233 m_hash.clear();
234 uint64_t h = 0;
235
236 if (m_bits == Bits32)
237 {
238 h = m_hash.GetHash32(phrase);
239 }
240 else
241 {
242 h = m_hash.GetHash64(phrase);
243 }
244 return h;
245 }
246
247 /** Hash function. */
249
250 /** Hashed dictionary of first instance of each hash. */
251 typedef std::map<uint64_t, std::string> hashdict_t;
252
253 /** The dictionary map, indexed by hash. */
255
256 /** Collision map of subsequent instances. */
257 typedef std::vector<std::pair<uint64_t, std::string>> collision_t;
258
259 /** The list of collisions. */
261
262}; // class Collider
263
264/**
265 * Word list and hashers to test.
266 */
268{
269 public:
270 /** Constructor. */
272 : m_nphrases(0)
273 {
274 m_words.reserve(320000);
275 }
276
277 /**
278 * Add a Collider containing a hash function.
279 *
280 * \param [in] c The Collider to add.
281 */
282 void Add(Collider c)
283 {
284 m_hashes.push_back(c);
285 }
286
287 /**
288 * Add a string to the dictionary.
289 *
290 * \param [in] phrase The string to add.
291 */
292 void Add(const std::string phrase)
293 {
294 if (phrase.empty())
295 {
296 return;
297 }
298
299 bool newPhrases = false;
300 for (auto& collider : m_hashes)
301 {
302 newPhrases |= collider.Add(phrase);
303 }
304
305 if (newPhrases)
306 {
307 ++m_nphrases;
308 m_words.push_back(phrase);
309 }
310 } // Add ()
311
312 /**
313 * Report the expected number of collisions.
314 *
315 * See, e.g.,
316 * http://www.math.dartmouth.edu/archive/m19w03/public_html/Section6-5.pdf
317 *
318 * \f[
319 * E(collisions) = n - k + k (1 - 1/k)^n
320 * \f]
321 *
322 * where <i>n</i> is the number of entries in the table, and
323 * <i>k</i> is the number of buckets.
324 *
325 * This form is numerically unstable for low collision rates.
326 * Expanding for large \f$ k \f$ we get
327 *
328 * \f{eqnarray*}{
329 * E(c) &=& \frac{1}{k} \binom{n}{2}
330 * - \frac{1}{{{k^2}}} \binom{n}{3}
331 * + \frac{1}{{{k^3}}} \binom{n}{4}
332 * - \ldots \\
333 * &=& \frac{1}{k} \binom{n}{2}
334 * \left[ {1 - \frac{{n - 2}}{{3k}}
335 * + \frac{{\left( {n - 2} \right)
336 * \left( {n - 3} \right)}}{{12{k^2}}}
337 * - \ldots } \right] \\
338 * &=& \frac{1}{k} \binom{n}{2}
339 * \left\{ {1 - \frac{{n - 2}}{{3k}}
340 * \left[ {1 + \frac{{n - 3}}{{4k}}
341 * - \ldots }
342 * \right]}
343 * \right\}
344 * \f}
345 *
346 * For simplicity, we'll use the first two terms
347 * of the second form.
348 */
350 {
351 // Expected number of collisions
352 //
353 // Number of buckets = k = 2^bits
354 long double k32 = 0xFFFFFFFF;
355 auto k64 = static_cast<long double>(0xFFFFFFFFFFFFFFFFULL);
356
357 long double n = m_nphrases;
358 long double Ec32 = n * (n - 1) / (2 * k32) * (1 - (n - 2) / (3 * k32));
359 long double Ec64 = n * (n - 1) / (2 * k64) * (1 - (n - 2) / (3 * k64));
360
361 // Output collisions
362 std::cout << "" << std::endl;
363 std::cout << "Number of words or phrases: " << n << std::endl;
364 std::cout << "Expected number of collisions: (32-bit table) " << Ec32 << std::endl;
365 std::cout << "Expected number of collisions: (64-bit table) " << Ec64 << std::endl;
366 } // ReportExpectedCollisions
367
368 /** Print the collisions for each Collider. */
369 void Report() const
370 {
372
373 for (const auto& collider : m_hashes)
374 {
375 collider.Report();
376 }
377 } // Report ()
378
379 /**
380 * Time and report the execution of one hash across the entire Dictionary.
381 *
382 * \param [in] collider The hash Collider to use.
383 */
384 void TimeOne(const Collider& collider)
385 {
386 // Hashing speed
387 uint32_t reps = 100;
388 Hasher h = collider.m_hash;
389 int start = clock();
390 for (const auto& word : m_words)
391 {
392 for (uint32_t i = 0; i < reps; ++i)
393 {
394 h.clear().GetHash32(word);
395 }
396 }
397 int stop = clock();
398 double delta = stop - start;
399 double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
400
401 std::cout << std::left << std::setw(32) << collider.GetName() << std::right << std::setw(10)
402 << m_nphrases << std::setw(10) << reps << std::setw(10) << stop - start
403 << std::setw(12) << per << std::endl;
404
405 } // TimeOne ()
406
407 /** Report the execution time of each hash across the entire Dictionary. */
408 void Time()
409 {
410 std::cout << "" << std::endl;
411 std::cout << std::left << std::setw(32) << "Hash timing" << std::right << std::setw(10)
412 << "Phrases" << std::setw(10) << "Reps" << std::setw(10) << "Ticks"
413 << std::setw(12) << "ns/hash" << std::endl;
414
415 for (const auto& collider : m_hashes)
416 {
417 TimeOne(collider);
418 }
419 } // Time ()
420
421 private:
422 unsigned long m_nphrases; /**< Number of strings hashed. */
423 std::vector<Collider> m_hashes; /**< List of hash Colliders. */
424 std::vector<std::string> m_words; /**< List of unique words. */
425
426}; // class Dictionary
427
428/**
429 * Source word list files.
430 */
432{
433 public:
434 /**
435 * CommandLine callback function to add a file argument to the list.
436 *
437 * \param [in] file The word file to add.
438 * \return \c true If the file is new to the list.
439 */
440 bool Add(const std::string& file)
441 {
442 if (std::find(m_files.begin(), m_files.end(), file) == m_files.end())
443 {
444 m_files.push_back(file);
445 }
446
447 return true;
448 }
449
450 /** \return The default dictionary path. */
451 static std::string GetDefault()
452 {
453 return "/usr/share/dict/words";
454 }
455
456 /**
457 * Add phrases from the files into the dict.
458 *
459 * \param [in,out] dict The Dictionary to add words to.
460 */
462 {
463 if (m_files.empty())
464 {
465 Add(GetDefault());
466 }
467
468 std::cout << "Hashing the dictionar" << (m_files.size() == 1 ? "y" : "ies") << std::endl;
469
470 for (const auto& dictFile : m_files)
471 {
472 std::cout << "Dictionary file: " << dictFile << std::endl;
473
474 // Find collisions
475
476 // Open the file
477 std::ifstream dictStream;
478 dictStream.open(dictFile);
479 if (!dictStream.is_open())
480 {
481 std::cerr << "Failed to open dictionary file."
482 << "'" << dictFile << "'" << std::endl;
483 continue;
484 }
485
486 while (dictStream.good())
487 {
488 std::string phrase;
489 getline(dictStream, phrase);
490 dict.Add(phrase);
491 } // while
492
493 dictStream.close();
494
495 } // for m_files
496
497 } // ReadInto
498
499 private:
500 std::vector<std::string> m_files; /**< List of word files to use. */
501
502}; // class DictFiles
503
504} // namespace Example
505
506} // namespace Hash
507
508} // namespace ns3
509
510using namespace ns3;
511using namespace ns3::Hash::Example;
512
513int
514main(int argc, char* argv[])
515{
516 std::cout << std::endl;
517 std::cout << "Hasher" << std::endl;
518
519 bool timing = false;
520 DictFiles files;
521
522 CommandLine cmd(__FILE__);
523 cmd.Usage("Find hash collisions in the dictionary.");
524 cmd.AddValue("dict",
525 "Dictionary file to hash",
528
529 cmd.AddValue("time", "Run timing test", timing);
530 cmd.Parse(argc, argv);
531
532 Dictionary dict;
533 dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits32));
534 dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits64));
535
536 dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits32));
537 dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits64));
538
539 files.ReadInto(dict);
540
541 dict.Report();
542
543 if (timing)
544 {
545 dict.Time();
546 } // if (timing)
547
548 return 0;
549} // main
Parse command-line arguments.
Definition: command-line.h:232
Keep track of collisions.
Collider(const std::string name, Hasher hash, const Bits bits)
Constructor.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Report() const
Print the collisions found.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
bool Add(const std::string phrase)
Add a string to the Collider.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Bits m_bits
Hash function.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string &file)
CommandLine callback function to add a file argument to the list.
static std::string GetDefault()
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition: hash.h:87
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:236
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:243
Hasher & clear()
Restore initial state.
Definition: hash.cc:56
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Args... > MakeCallback(R(T::*memPtr)(Args...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:706