A Discrete-Event Network Simulator
API
Loading...
Searching...
No Matches
hash-example.cc
Go to the documentation of this file.
1/*
2 * SPDX-License-Identifier: GPL-2.0-only
3 */
4
5#include "ns3/core-module.h"
6#include "ns3/hash.h"
7
8#include <algorithm> // find
9#include <fstream>
10#include <iomanip>
11#include <iostream>
12#include <map>
13#include <vector>
14
15/**
16 * @file
17 * @ingroup core-examples
18 * @ingroup hash
19 * Example usage of ns3::Hash.
20 *
21 * This example reads words from a list of files, creates a dictionary
22 * mapping words to hash values, reports collisions, and measures the
23 * execution time of the hash function implementations.
24 *
25 * See \ref hash
26 *
27 * Example Output:
28 * @verbatim
29
30./ns3 run "hasher-example --time \
31 --dict=/usr/share/dict/web2 \
32 --dict=/usr/share/dict/web2a \
33 --dict=/usr/share/dict/propernames \
34 --dict=/usr/share/dict/connectives"
35
36'build' finished successfully (3.028s)
37
38Hasher
39Hashing the dictionaries
40Dictionary file: /usr/share/dict/web2
41Dictionary file: /usr/share/dict/web2a
42Dictionary file: /usr/share/dict/propernames
43Dictionary file: /usr/share/dict/connectives
44
45Number of words or phrases: 312094
46Expected number of collisions: (32-bit table) 11.3389
47Expected number of collisions: (64-bit table) 2.6401e-09
48
49FNV1a (32-bit version): 13 collisions:
50a75b0ae7 elephantlike interventralia
51091c4808 diversionary propenseness
52172be6ba bairnishness sora
53e6cb5099 purifier spongoblastic
544a841078 ameliorable unsmotherable
556ed21de2 brand-newness peripherial
5622acb19b Petrarchism dewy-pinioned
575723634a grain gold hyphenation
58f58026c1 seven-channeled turritella
59946fc6ec multiradiate sister block
6088625851 brachtmema ule tree
61dc28b5ea Un-lutheran gutturotetany
629255bf44 re-sorter working stress
63
64FNV1a (64-bit version): 0 collisions:
65
66Murmur3 (32-bit version): 11 collisions:
675ea83eee impalace metahewettite
68e06fbdde constancy oligosynthetic
692a713795 hypermonosyllable presatisfaction
70c8bf0ef9 Hadromerina starky
71d9c04b3d Accipiter syllable
72c0da8f81 seriation trigonon
7317612b26 daemon unerring
74c2349ad7 air spring iron
751d91386f nine-pounder semicrescentic
76fe17b1a5 cone speaker oblong-wedgeshaped
77faa12798 saw bearing wilting point
78
79Murmur3 (64-bit version): 0 collisions:
80
81Hash timing Phrases Reps Ticks ns/hash
82FNV1a (32-bit version) 312094 100 3140531 100.628
83FNV1a (64-bit version) 312094 100 3145240 100.779
84Murmur3 (32-bit version) 312094 100 4152139 133.041
85Murmur3 (64-bit version) 312094 100 4191464 134.301
86
87 \endverbatim
88 */
89
90namespace ns3
91{
92
94
95namespace Hash
96{
97
98/**
99 * @ingroup hash
100 * Namespace for hasher-example.
101 */
102namespace Example
103{
104
105/**
106 * Keep track of collisions
107 */
109{
110 public:
111 std::string m_name; /**< Name of this hash. */
112 Hasher m_hash; /**< The hash. */
113
114 /** The size of hash function being tested. */
115 enum Bits
116 {
117 Bits32, /**< Use 32-bit hash function. */
118 Bits64 /**< Use 64-bit hash function. */
119 };
120
121 /**
122 * Constructor.
123 *
124 * @param [in] name Hash function name.
125 * @param [in] hash Hash function.
126 * @param [in] bits Which hash length to use.
127 */
128 Collider(const std::string name, Hasher hash, const Bits bits)
129 : m_name(name),
130 m_hash(hash),
131 m_bits(bits)
132 {
133 }
134
135 /**
136 * Add a string to the Collider.
137 *
138 * @param [in] phrase The string to add.
139 * @return \c true If this was a new string.
140 */
141 bool Add(const std::string phrase)
142 {
143 uint64_t h = GetHash(phrase);
144
145 // Check for collisions
146 if (m_dict.count(h) > 0)
147 {
148 // we've seen this hash before, why?
149 if (phrase == m_dict[h])
150 {
151 // duplicate phrase
152 return false;
153 }
154
155 // new phrase generating a real collision
156 // alphabetize
157 if (m_dict[h] < phrase)
158 {
159 m_coll.emplace_back(h, phrase);
160 }
161 else
162 {
163 m_coll.emplace_back(h, m_dict[h]);
164 m_dict[h] = phrase;
165 }
166 }
167 else
168 {
169 // Insert new hash
170 m_dict.insert(std::make_pair(h, phrase));
171 }
172 return true;
173 } // Add ()
174
175 /**
176 * @return The hash name, including the length.
177 */
178 std::string GetName() const
179 {
180 std::string name = m_name;
181
182 switch (m_bits)
183 {
184 case Bits32:
185 name += " (32-bit version)";
186 break;
187 case Bits64:
188 name += " (64-bit version)";
189 break;
190 default:
191 name += " (unknown!?!)";
192 }
193 return name;
194 }
195
196 /** Print the collisions found. */
197 void Report() const
198 {
199 std::cout << std::endl;
200
201 std::cout << GetName() << ": " << m_coll.size() << " collisions:" << std::endl;
202 for (const auto& collision : m_coll)
203 {
204 uint64_t h = collision.first;
205
206 std::cout << std::setfill('0') << std::hex << std::setw(8) << h << std::dec
207 << std::setfill(' ') << " " << std::setw(20) << std::left
208 << m_dict.find(h)->second << collision.second << std::right << std::endl;
209 }
210 } // Report ()
211
212 private:
213 /**
214 * Get the appropriate hash value.
215 *
216 * @param [in] phrase The string to hash.
217 * @return The hash value, using the number of bits set in the constructor.
218 */
219 uint64_t GetHash(const std::string phrase)
220 {
221 m_hash.clear();
222 uint64_t h = 0;
223
224 if (m_bits == Bits32)
225 {
226 h = m_hash.GetHash32(phrase);
227 }
228 else
229 {
230 h = m_hash.GetHash64(phrase);
231 }
232 return h;
233 }
234
235 /** Hash function. */
237
238 /** Hashed dictionary of first instance of each hash. */
239 typedef std::map<uint64_t, std::string> hashdict_t;
240
241 /** The dictionary map, indexed by hash. */
243
244 /** Collision map of subsequent instances. */
245 typedef std::vector<std::pair<uint64_t, std::string>> collision_t;
246
247 /** The list of collisions. */
249
250 // end of class Collider
251};
252
253/**
254 * Word list and hashers to test.
255 */
257{
258 public:
259 /** Constructor. */
261 : m_nphrases(0)
262 {
263 m_words.reserve(320000);
264 }
265
266 /**
267 * Add a Collider containing a hash function.
268 *
269 * @param [in] c The Collider to add.
270 */
271 void Add(Collider c)
272 {
273 m_hashes.push_back(c);
274 }
275
276 /**
277 * Add a string to the dictionary.
278 *
279 * @param [in] phrase The string to add.
280 */
281 void Add(const std::string phrase)
282 {
283 if (phrase.empty())
284 {
285 return;
286 }
287
288 bool newPhrases = false;
289 for (auto& collider : m_hashes)
290 {
291 newPhrases |= collider.Add(phrase);
292 }
293
294 if (newPhrases)
295 {
296 ++m_nphrases;
297 m_words.push_back(phrase);
298 }
299 } // Add ()
300
301 /**
302 * Report the expected number of collisions.
303 *
304 * See, e.g.,
305 * http://www.math.dartmouth.edu/archive/m19w03/public_html/Section6-5.pdf
306 *
307 * \f[
308 * E(collisions) = n - k + k (1 - 1/k)^n
309 * \f]
310 *
311 * where <i>n</i> is the number of entries in the table, and
312 * <i>k</i> is the number of buckets.
313 *
314 * This form is numerically unstable for low collision rates.
315 * Expanding for large \f$ k \f$ we get
316 *
317 * \f{eqnarray*}{
318 * E(c) &=& \frac{1}{k} \binom{n}{2}
319 * - \frac{1}{{{k^2}}} \binom{n}{3}
320 * + \frac{1}{{{k^3}}} \binom{n}{4}
321 * - \ldots \\
322 * &=& \frac{1}{k} \binom{n}{2}
323 * \left[ {1 - \frac{{n - 2}}{{3k}}
324 * + \frac{{\left( {n - 2} \right)
325 * \left( {n - 3} \right)}}{{12{k^2}}}
326 * - \ldots } \right] \\
327 * &=& \frac{1}{k} \binom{n}{2}
328 * \left\{ {1 - \frac{{n - 2}}{{3k}}
329 * \left[ {1 + \frac{{n - 3}}{{4k}}
330 * - \ldots }
331 * \right]}
332 * \right\}
333 * \f}
334 *
335 * For simplicity, we'll use the first two terms
336 * of the second form.
337 */
339 {
340 // Expected number of collisions
341 //
342 // Number of buckets = k = 2^bits
343 long double k32 = 0xFFFFFFFF;
344 auto k64 = static_cast<long double>(0xFFFFFFFFFFFFFFFFULL);
345
346 long double n = m_nphrases;
347 long double Ec32 = n * (n - 1) / (2 * k32) * (1 - (n - 2) / (3 * k32));
348 long double Ec64 = n * (n - 1) / (2 * k64) * (1 - (n - 2) / (3 * k64));
349
350 // Output collisions
351 std::cout << "" << std::endl;
352 std::cout << "Number of words or phrases: " << n << std::endl;
353 std::cout << "Expected number of collisions: (32-bit table) " << Ec32 << std::endl;
354 std::cout << "Expected number of collisions: (64-bit table) " << Ec64 << std::endl;
355 } // ReportExpectedCollisions
356
357 /** Print the collisions for each Collider. */
358 void Report() const
359 {
361
362 for (const auto& collider : m_hashes)
363 {
364 collider.Report();
365 }
366 } // Report ()
367
368 /**
369 * Time and report the execution of one hash across the entire Dictionary.
370 *
371 * @param [in] collider The hash Collider to use.
372 */
373 void TimeOne(const Collider& collider)
374 {
375 // Hashing speed
376 uint32_t reps = 100;
377 Hasher h = collider.m_hash;
378 int start = clock();
379 for (const auto& word : m_words)
380 {
381 for (uint32_t i = 0; i < reps; ++i)
382 {
383 h.clear().GetHash32(word);
384 }
385 }
386 int stop = clock();
387 double delta = stop - start;
388 double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
389
390 std::cout << std::left << std::setw(32) << collider.GetName() << std::right << std::setw(10)
391 << m_nphrases << std::setw(10) << reps << std::setw(10) << stop - start
392 << std::setw(12) << per << std::endl;
393
394 } // TimeOne ()
395
396 /** Report the execution time of each hash across the entire Dictionary. */
397 void Time()
398 {
399 std::cout << "" << std::endl;
400 std::cout << std::left << std::setw(32) << "Hash timing" << std::right << std::setw(10)
401 << "Phrases" << std::setw(10) << "Reps" << std::setw(10) << "Ticks"
402 << std::setw(12) << "ns/hash" << std::endl;
403
404 for (const auto& collider : m_hashes)
405 {
406 TimeOne(collider);
407 }
408 } // Time ()
409
410 private:
411 unsigned long m_nphrases; /**< Number of strings hashed. */
412 std::vector<Collider> m_hashes; /**< List of hash Colliders. */
413 std::vector<std::string> m_words; /**< List of unique words. */
414
415 // end of class Dictionary
416};
417
418/**
419 * Source word list files.
420 */
422{
423 public:
424 /**
425 * CommandLine callback function to add a file argument to the list.
426 *
427 * @param [in] file The word file to add.
428 * @return \c true If the file is new to the list.
429 */
430 bool Add(const std::string& file)
431 {
432 if (std::find(m_files.begin(), m_files.end(), file) == m_files.end())
433 {
434 m_files.push_back(file);
435 }
436
437 return true;
438 }
439
440 /** @return The default dictionary path. */
441 static std::string GetDefault()
442 {
443 return "/usr/share/dict/words";
444 }
445
446 /**
447 * Add phrases from the files into the dict.
448 *
449 * @param [in,out] dict The Dictionary to add words to.
450 */
452 {
453 if (m_files.empty())
454 {
455 Add(GetDefault());
456 }
457
458 std::cout << "Hashing the dictionar" << (m_files.size() == 1 ? "y" : "ies") << std::endl;
459
460 for (const auto& dictFile : m_files)
461 {
462 std::cout << "Dictionary file: " << dictFile << std::endl;
463
464 // Find collisions
465
466 // Open the file
467 std::ifstream dictStream;
468 dictStream.open(dictFile);
469 if (!dictStream.is_open())
470 {
471 std::cerr << "Failed to open dictionary file.'" << dictFile << "'" << std::endl;
472 continue;
473 }
474
475 while (dictStream.good())
476 {
477 std::string phrase;
478 getline(dictStream, phrase);
479 dict.Add(phrase);
480 }
481
482 dictStream.close();
483 }
484 } // ReadInto
485
486 private:
487 std::vector<std::string> m_files; /**< List of word files to use. */
488
489 // end of class DictFiles
490};
491
492} // namespace Example
493
494} // namespace Hash
495
496} // namespace ns3
497
498using namespace ns3;
499using namespace ns3::Hash::Example;
500
501int
502main(int argc, char* argv[])
503{
504 std::cout << std::endl;
505 std::cout << "Hasher" << std::endl;
506
507 bool timing = false;
508 DictFiles files;
509
510 CommandLine cmd(__FILE__);
511 cmd.Usage("Find hash collisions in the dictionary.");
512 cmd.AddValue("dict",
513 "Dictionary file to hash",
516
517 cmd.AddValue("time", "Run timing test", timing);
518 cmd.Parse(argc, argv);
519
520 Dictionary dict;
523
526
527 files.ReadInto(dict);
528
529 dict.Report();
530
531 if (timing)
532 {
533 dict.Time();
534 }
535
536 return 0;
537}
Parse command-line arguments.
Keep track of collisions.
Collider(const std::string name, Hasher hash, const Bits bits)
Constructor.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Report() const
Print the collisions found.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
bool Add(const std::string phrase)
Add a string to the Collider.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string &file)
CommandLine callback function to add a file argument to the list.
static std::string GetDefault()
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition hash.h:76
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition hash.h:227
Hasher & clear()
Restore initial state.
Definition hash.cc:45
Callback< R, Args... > MakeCallback(R(T::*memPtr)(Args...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition callback.h:690
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition log.h:194
Ptr< T > Create(Ts &&... args)
Create class instances by constructors with varying numbers of arguments and return them by Ptr.
Definition ptr.h:454
Namespace for hasher-example.
Hash function implementations.
Every class exported by the ns3 library is enclosed in the ns3 namespace.